vendor files

This commit is contained in:
Serguei Bezverkhi
2018-01-09 13:57:14 -05:00
parent 558bc6c02a
commit 7b24313bd6
16547 changed files with 4527373 additions and 0 deletions

103
vendor/k8s.io/kubernetes/pkg/controller/node/BUILD generated vendored Normal file
View File

@ -0,0 +1,103 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = ["nodecontroller_test.go"],
importpath = "k8s.io/kubernetes/pkg/controller/node",
library = ":go_default_library",
deps = [
"//pkg/cloudprovider:go_default_library",
"//pkg/cloudprovider/providers/fake:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/controller/node/ipam:go_default_library",
"//pkg/controller/node/scheduler:go_default_library",
"//pkg/controller/node/util:go_default_library",
"//pkg/controller/testutil:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/taints:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/equality:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/informers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/fake:go_default_library",
"//vendor/k8s.io/client-go/testing:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"metrics.go",
"node_controller.go",
],
importpath = "k8s.io/kubernetes/pkg/controller/node",
deps = [
"//pkg/api/v1/node:go_default_library",
"//pkg/cloudprovider:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/controller/node/ipam:go_default_library",
"//pkg/controller/node/ipam/sync:go_default_library",
"//pkg/controller/node/scheduler:go_default_library",
"//pkg/controller/node/util:go_default_library",
"//pkg/util/metrics:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/system:go_default_library",
"//pkg/util/taints:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/equality:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/informers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/controller/node/ipam:all-srcs",
"//pkg/controller/node/scheduler:all-srcs",
"//pkg/controller/node/util:all-srcs",
],
tags = ["automanaged"],
)

9
vendor/k8s.io/kubernetes/pkg/controller/node/OWNERS generated vendored Executable file
View File

@ -0,0 +1,9 @@
approvers:
- gmarek
- bowei
reviewers:
- gmarek
- smarterclayton
- ingvagabund
- aveshagarwal
- k82cn

19
vendor/k8s.io/kubernetes/pkg/controller/node/doc.go generated vendored Normal file
View File

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package node contains code for syncing cloud instances with
// node registry
package node // import "k8s.io/kubernetes/pkg/controller/node"

View File

@ -0,0 +1,90 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = [
"controller_test.go",
"range_allocator_test.go",
"timeout_test.go",
],
importpath = "k8s.io/kubernetes/pkg/controller/node/ipam",
library = ":go_default_library",
deps = [
"//pkg/controller:go_default_library",
"//pkg/controller/node/ipam/cidrset:go_default_library",
"//pkg/controller/node/ipam/test:go_default_library",
"//pkg/controller/testutil:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/fake:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"adapter.go",
"cidr_allocator.go",
"cloud_cidr_allocator.go",
"controller.go",
"doc.go",
"range_allocator.go",
"timeout.go",
],
importpath = "k8s.io/kubernetes/pkg/controller/node/ipam",
deps = [
"//pkg/api/v1/node:go_default_library",
"//pkg/cloudprovider:go_default_library",
"//pkg/cloudprovider/providers/gce:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/controller/node/ipam/cidrset:go_default_library",
"//pkg/controller/node/ipam/sync:go_default_library",
"//pkg/controller/node/util:go_default_library",
"//pkg/util/node:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/metrics/pkg/client/clientset_generated/clientset/scheme:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/controller/node/ipam/cidrset:all-srcs",
"//pkg/controller/node/ipam/sync:all-srcs",
"//pkg/controller/node/ipam/test:all-srcs",
],
tags = ["automanaged"],
)

7
vendor/k8s.io/kubernetes/pkg/controller/node/ipam/OWNERS generated vendored Executable file
View File

@ -0,0 +1,7 @@
approvers:
- bowei
- dnardo
reviewers:
- bowei
- dnardo
- freehan

View File

@ -0,0 +1,125 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"context"
"encoding/json"
"net"
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
clientset "k8s.io/client-go/kubernetes"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
nodeutil "k8s.io/kubernetes/pkg/util/node"
"k8s.io/metrics/pkg/client/clientset_generated/clientset/scheme"
)
type adapter struct {
k8s clientset.Interface
cloud *gce.GCECloud
recorder record.EventRecorder
}
func newAdapter(k8s clientset.Interface, cloud *gce.GCECloud) *adapter {
ret := &adapter{
k8s: k8s,
cloud: cloud,
}
broadcaster := record.NewBroadcaster()
broadcaster.StartLogging(glog.Infof)
ret.recorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloudCIDRAllocator"})
glog.V(0).Infof("Sending events to api server.")
broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{
Interface: v1core.New(k8s.CoreV1().RESTClient()).Events(""),
})
return ret
}
func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) {
cidrs, err := a.cloud.AliasRanges(types.NodeName(nodeName))
if err != nil {
return nil, err
}
switch len(cidrs) {
case 0:
return nil, nil
case 1:
break
default:
glog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", nodeName, cidrs)
}
_, cidrRange, err := net.ParseCIDR(cidrs[0])
if err != nil {
return nil, err
}
return cidrRange, nil
}
func (a *adapter) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error {
return a.cloud.AddAliasToInstance(types.NodeName(nodeName), cidrRange)
}
func (a *adapter) Node(ctx context.Context, name string) (*v1.Node, error) {
return a.k8s.CoreV1().Nodes().Get(name, metav1.GetOptions{})
}
func (a *adapter) UpdateNodePodCIDR(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error {
patch := map[string]interface{}{
"apiVersion": node.APIVersion,
"kind": node.Kind,
"metadata": map[string]interface{}{"name": node.Name},
"spec": map[string]interface{}{"podCIDR": cidrRange.String()},
}
bytes, err := json.Marshal(patch)
if err != nil {
return err
}
_, err = a.k8s.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, bytes)
return err
}
func (a *adapter) UpdateNodeNetworkUnavailable(nodeName string, unavailable bool) error {
condition := v1.ConditionFalse
if unavailable {
condition = v1.ConditionTrue
}
return nodeutil.SetNodeCondition(a.k8s, types.NodeName(nodeName), v1.NodeCondition{
Type: v1.NodeNetworkUnavailable,
Status: condition,
Reason: "RouteCreated",
Message: "NodeController created an implicit route",
LastTransitionTime: metav1.Now(),
})
}
func (a *adapter) EmitNodeWarningEvent(nodeName, reason, fmt string, args ...interface{}) {
ref := &v1.ObjectReference{Kind: "Node", Name: nodeName}
a.recorder.Eventf(ref, v1.EventTypeNormal, reason, fmt, args...)
}

View File

@ -0,0 +1,124 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"fmt"
"net"
"time"
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
informers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/cloudprovider"
)
type nodeAndCIDR struct {
cidr *net.IPNet
nodeName string
}
// CIDRAllocatorType is the type of the allocator to use.
type CIDRAllocatorType string
const (
// RangeAllocatorType is the allocator that uses an internal CIDR
// range allocator to do node CIDR range allocations.
RangeAllocatorType CIDRAllocatorType = "RangeAllocator"
// CloudAllocatorType is the allocator that uses cloud platform
// support to do node CIDR range allocations.
CloudAllocatorType CIDRAllocatorType = "CloudAllocator"
// IPAMFromClusterAllocatorType uses the ipam controller sync'ing the node
// CIDR range allocations from the cluster to the cloud.
IPAMFromClusterAllocatorType = "IPAMFromCluster"
// IPAMFromCloudAllocatorType uses the ipam controller sync'ing the node
// CIDR range allocations from the cloud to the cluster.
IPAMFromCloudAllocatorType = "IPAMFromCloud"
)
// TODO: figure out the good setting for those constants.
const (
// The amount of time the nodecontroller polls on the list nodes endpoint.
apiserverStartupGracePeriod = 10 * time.Minute
// The no. of NodeSpec updates NC can process concurrently.
cidrUpdateWorkers = 30
// The max no. of NodeSpec updates that can be enqueued.
cidrUpdateQueueSize = 5000
// cidrUpdateRetries is the no. of times a NodeSpec update will be retried before dropping it.
cidrUpdateRetries = 10
)
// CIDRAllocator is an interface implemented by things that know how
// to allocate/occupy/recycle CIDR for nodes.
type CIDRAllocator interface {
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid
// CIDR if it doesn't currently have one or mark the CIDR as used if
// the node already have one.
AllocateOrOccupyCIDR(node *v1.Node) error
// ReleaseCIDR releases the CIDR of the removed node
ReleaseCIDR(node *v1.Node) error
// Run starts all the working logic of the allocator.
Run(stopCh <-chan struct{})
}
// New creates a new CIDR range allocator.
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, allocatorType CIDRAllocatorType, clusterCIDR, serviceCIDR *net.IPNet, nodeCIDRMaskSize int) (CIDRAllocator, error) {
nodeList, err := listNodes(kubeClient)
if err != nil {
return nil, err
}
switch allocatorType {
case RangeAllocatorType:
return NewCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList)
case CloudAllocatorType:
return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer)
default:
return nil, fmt.Errorf("Invalid CIDR allocator type: %v", allocatorType)
}
}
func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
var nodeList *v1.NodeList
// We must poll because apiserver might not be up. This error causes
// controller manager to restart.
if pollErr := wait.Poll(10*time.Second, apiserverStartupGracePeriod, func() (bool, error) {
var err error
nodeList, err = kubeClient.CoreV1().Nodes().List(metav1.ListOptions{
FieldSelector: fields.Everything().String(),
LabelSelector: labels.Everything().String(),
})
if err != nil {
glog.Errorf("Failed to list all nodes: %v", err)
return false, nil
}
return true, nil
}); pollErr != nil {
return nil, fmt.Errorf("Failed to list all nodes in %v, cannot proceed without updating CIDR map",
apiserverStartupGracePeriod)
}
return nodeList, nil
}

View File

@ -0,0 +1,34 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = ["cidr_set_test.go"],
importpath = "k8s.io/kubernetes/pkg/controller/node/ipam/cidrset",
library = ":go_default_library",
deps = ["//vendor/github.com/golang/glog:go_default_library"],
)
go_library(
name = "go_default_library",
srcs = ["cidr_set.go"],
importpath = "k8s.io/kubernetes/pkg/controller/node/ipam/cidrset",
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,264 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cidrset
import (
"encoding/binary"
"errors"
"fmt"
"math/big"
"math/bits"
"net"
"sync"
)
// CidrSet manages a set of CIDR ranges from which blocks of IPs can
// be allocated from.
type CidrSet struct {
sync.Mutex
clusterCIDR *net.IPNet
clusterIP net.IP
clusterMaskSize int
maxCIDRs int
nextCandidate int
used big.Int
subNetMaskSize int
}
const (
// The subnet mask size cannot be greater than 16 more than the cluster mask size
// TODO: https://github.com/kubernetes/kubernetes/issues/44918
// clusterSubnetMaxDiff limited to 16 due to the uncompressed bitmap
clusterSubnetMaxDiff = 16
// halfIPv6Len is the half of the IPv6 length
halfIPv6Len = net.IPv6len / 2
)
var (
// ErrCIDRRangeNoCIDRsRemaining occurs when there is no more space
// to allocate CIDR ranges.
ErrCIDRRangeNoCIDRsRemaining = errors.New(
"CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range")
// ErrCIDRSetSubNetTooBig occurs when the subnet mask size is too
// big compared to the CIDR mask size.
ErrCIDRSetSubNetTooBig = errors.New(
"New CIDR set failed; the node CIDR size is too big")
)
// NewCIDRSet creates a new CidrSet.
func NewCIDRSet(clusterCIDR *net.IPNet, subNetMaskSize int) (*CidrSet, error) {
clusterMask := clusterCIDR.Mask
clusterMaskSize, _ := clusterMask.Size()
var maxCIDRs int
if (clusterCIDR.IP.To4() == nil) && (subNetMaskSize-clusterMaskSize > clusterSubnetMaxDiff) {
return nil, ErrCIDRSetSubNetTooBig
}
maxCIDRs = 1 << uint32(subNetMaskSize-clusterMaskSize)
return &CidrSet{
clusterCIDR: clusterCIDR,
clusterIP: clusterCIDR.IP,
clusterMaskSize: clusterMaskSize,
maxCIDRs: maxCIDRs,
subNetMaskSize: subNetMaskSize,
}, nil
}
func (s *CidrSet) indexToCIDRBlock(index int) *net.IPNet {
var ip []byte
var mask int
switch /*v4 or v6*/ {
case s.clusterIP.To4() != nil:
{
j := uint32(index) << uint32(32-s.subNetMaskSize)
ipInt := (binary.BigEndian.Uint32(s.clusterIP)) | j
ip = make([]byte, 4)
binary.BigEndian.PutUint32(ip, ipInt)
mask = 32
}
case s.clusterIP.To16() != nil:
{
// leftClusterIP | rightClusterIP
// 2001:0DB8:1234:0000:0000:0000:0000:0000
const v6NBits = 128
const halfV6NBits = v6NBits / 2
leftClusterIP := binary.BigEndian.Uint64(s.clusterIP[:halfIPv6Len])
rightClusterIP := binary.BigEndian.Uint64(s.clusterIP[halfIPv6Len:])
leftIP, rightIP := make([]byte, halfIPv6Len), make([]byte, halfIPv6Len)
if s.subNetMaskSize <= halfV6NBits {
// We only care about left side IP
leftClusterIP |= uint64(index) << uint(halfV6NBits-s.subNetMaskSize)
} else {
if s.clusterMaskSize < halfV6NBits {
// see how many bits are needed to reach the left side
btl := uint(s.subNetMaskSize - halfV6NBits)
indexMaxBit := uint(64 - bits.LeadingZeros64(uint64(index)))
if indexMaxBit > btl {
leftClusterIP |= uint64(index) >> btl
}
}
// the right side will be calculated the same way either the
// subNetMaskSize affects both left and right sides
rightClusterIP |= uint64(index) << uint(v6NBits-s.subNetMaskSize)
}
binary.BigEndian.PutUint64(leftIP, leftClusterIP)
binary.BigEndian.PutUint64(rightIP, rightClusterIP)
ip = append(leftIP, rightIP...)
mask = 128
}
}
return &net.IPNet{
IP: ip,
Mask: net.CIDRMask(s.subNetMaskSize, mask),
}
}
// AllocateNext allocates the next free CIDR range. This will set the range
// as occupied and return the allocated range.
func (s *CidrSet) AllocateNext() (*net.IPNet, error) {
s.Lock()
defer s.Unlock()
nextUnused := -1
for i := 0; i < s.maxCIDRs; i++ {
candidate := (i + s.nextCandidate) % s.maxCIDRs
if s.used.Bit(candidate) == 0 {
nextUnused = candidate
break
}
}
if nextUnused == -1 {
return nil, ErrCIDRRangeNoCIDRsRemaining
}
s.nextCandidate = (nextUnused + 1) % s.maxCIDRs
s.used.SetBit(&s.used, nextUnused, 1)
return s.indexToCIDRBlock(nextUnused), nil
}
func (s *CidrSet) getBeginingAndEndIndices(cidr *net.IPNet) (begin, end int, err error) {
begin, end = 0, s.maxCIDRs-1
cidrMask := cidr.Mask
maskSize, _ := cidrMask.Size()
var ipSize int
if cidr == nil {
return -1, -1, fmt.Errorf("Error getting indices for cluster cidr %v, cidr is nil", s.clusterCIDR)
}
if !s.clusterCIDR.Contains(cidr.IP.Mask(s.clusterCIDR.Mask)) && !cidr.Contains(s.clusterCIDR.IP.Mask(cidr.Mask)) {
return -1, -1, fmt.Errorf("cidr %v is out the range of cluster cidr %v", cidr, s.clusterCIDR)
}
if s.clusterMaskSize < maskSize {
ipSize = net.IPv4len
if cidr.IP.To4() == nil {
ipSize = net.IPv6len
}
subNetMask := net.CIDRMask(s.subNetMaskSize, ipSize*8)
begin, err = s.getIndexForCIDR(&net.IPNet{
IP: cidr.IP.Mask(subNetMask),
Mask: subNetMask,
})
if err != nil {
return -1, -1, err
}
ip := make([]byte, ipSize)
if cidr.IP.To4() != nil {
ipInt := binary.BigEndian.Uint32(cidr.IP) | (^binary.BigEndian.Uint32(cidr.Mask))
binary.BigEndian.PutUint32(ip, ipInt)
} else {
// ipIntLeft | ipIntRight
// 2001:0DB8:1234:0000:0000:0000:0000:0000
ipIntLeft := binary.BigEndian.Uint64(cidr.IP[:net.IPv6len/2]) | (^binary.BigEndian.Uint64(cidr.Mask[:net.IPv6len/2]))
ipIntRight := binary.BigEndian.Uint64(cidr.IP[net.IPv6len/2:]) | (^binary.BigEndian.Uint64(cidr.Mask[net.IPv6len/2:]))
binary.BigEndian.PutUint64(ip[:net.IPv6len/2], ipIntLeft)
binary.BigEndian.PutUint64(ip[net.IPv6len/2:], ipIntRight)
}
end, err = s.getIndexForCIDR(&net.IPNet{
IP: net.IP(ip).Mask(subNetMask),
Mask: subNetMask,
})
if err != nil {
return -1, -1, err
}
}
return begin, end, nil
}
// Release releases the given CIDR range.
func (s *CidrSet) Release(cidr *net.IPNet) error {
begin, end, err := s.getBeginingAndEndIndices(cidr)
if err != nil {
return err
}
s.Lock()
defer s.Unlock()
for i := begin; i <= end; i++ {
s.used.SetBit(&s.used, i, 0)
}
return nil
}
// Occupy marks the given CIDR range as used. Occupy does not check if the CIDR
// range was previously used.
func (s *CidrSet) Occupy(cidr *net.IPNet) (err error) {
begin, end, err := s.getBeginingAndEndIndices(cidr)
if err != nil {
return err
}
s.Lock()
defer s.Unlock()
for i := begin; i <= end; i++ {
s.used.SetBit(&s.used, i, 1)
}
return nil
}
func (s *CidrSet) getIndexForCIDR(cidr *net.IPNet) (int, error) {
return s.getIndexForIP(cidr.IP)
}
func (s *CidrSet) getIndexForIP(ip net.IP) (int, error) {
if ip.To4() != nil {
cidrIndex := (binary.BigEndian.Uint32(s.clusterIP) ^ binary.BigEndian.Uint32(ip.To4())) >> uint32(32-s.subNetMaskSize)
if cidrIndex >= uint32(s.maxCIDRs) {
return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.subNetMaskSize)
}
return int(cidrIndex), nil
}
if ip.To16() != nil {
bigIP := big.NewInt(0).SetBytes(s.clusterIP)
bigIP = bigIP.Xor(bigIP, big.NewInt(0).SetBytes(ip))
cidrIndexBig := bigIP.Rsh(bigIP, uint(net.IPv6len*8-s.subNetMaskSize))
cidrIndex := cidrIndexBig.Uint64()
if cidrIndex >= uint64(s.maxCIDRs) {
return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.subNetMaskSize)
}
return int(cidrIndex), nil
}
return 0, fmt.Errorf("invalid IP: %v", ip)
}

View File

@ -0,0 +1,733 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cidrset
import (
"math/big"
"net"
"reflect"
"testing"
"github.com/golang/glog"
)
func TestCIDRSetFullyAllocated(t *testing.T) {
cases := []struct {
clusterCIDRStr string
subNetMaskSize int
expectedCIDR string
description string
}{
{
clusterCIDRStr: "127.123.234.0/30",
subNetMaskSize: 30,
expectedCIDR: "127.123.234.0/30",
description: "Fully allocated CIDR with IPv4",
},
{
clusterCIDRStr: "beef:1234::/30",
subNetMaskSize: 30,
expectedCIDR: "beef:1234::/30",
description: "Fully allocated CIDR with IPv6",
},
}
for _, tc := range cases {
_, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr)
a, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
p, err := a.AllocateNext()
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
if p.String() != tc.expectedCIDR {
t.Fatalf("unexpected allocated cidr: %v, expecting %v for %v",
p.String(), tc.expectedCIDR, tc.description)
}
_, err = a.AllocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range for %v", tc.description)
}
a.Release(p)
p, err = a.AllocateNext()
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
if p.String() != tc.expectedCIDR {
t.Fatalf("unexpected allocated cidr: %v, expecting %v for %v",
p.String(), tc.expectedCIDR, tc.description)
}
_, err = a.AllocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range for %v", tc.description)
}
}
}
func TestIndexToCIDRBlock(t *testing.T) {
cases := []struct {
clusterCIDRStr string
subnetMaskSize int
index int
CIDRBlock string
description string
}{
{
clusterCIDRStr: "127.123.3.0/16",
subnetMaskSize: 24,
index: 0,
CIDRBlock: "127.123.0.0/24",
description: "1st IP address indexed with IPv4",
},
{
clusterCIDRStr: "127.123.0.0/16",
subnetMaskSize: 24,
index: 15,
CIDRBlock: "127.123.15.0/24",
description: "16th IP address indexed with IPv4",
},
{
clusterCIDRStr: "192.168.5.219/28",
subnetMaskSize: 32,
index: 5,
CIDRBlock: "192.168.5.213/32",
description: "5th IP address indexed with IPv4",
},
{
clusterCIDRStr: "2001:0db8:1234:3::/48",
subnetMaskSize: 64,
index: 0,
CIDRBlock: "2001:db8:1234::/64",
description: "1st IP address indexed with IPv6 /64",
},
{
clusterCIDRStr: "2001:0db8:1234::/48",
subnetMaskSize: 64,
index: 15,
CIDRBlock: "2001:db8:1234:f::/64",
description: "16th IP address indexed with IPv6 /64",
},
{
clusterCIDRStr: "2001:0db8:85a3::8a2e:0370:7334/50",
subnetMaskSize: 63,
index: 6425,
CIDRBlock: "2001:db8:85a3:3232::/63",
description: "6426th IP address indexed with IPv6 /63",
},
{
clusterCIDRStr: "2001:0db8::/32",
subnetMaskSize: 48,
index: 0,
CIDRBlock: "2001:db8::/48",
description: "1st IP address indexed with IPv6 /48",
},
{
clusterCIDRStr: "2001:0db8::/32",
subnetMaskSize: 48,
index: 15,
CIDRBlock: "2001:db8:f::/48",
description: "16th IP address indexed with IPv6 /48",
},
{
clusterCIDRStr: "2001:0db8:85a3::8a2e:0370:7334/32",
subnetMaskSize: 48,
index: 6425,
CIDRBlock: "2001:db8:1919::/48",
description: "6426th IP address indexed with IPv6 /48",
},
{
clusterCIDRStr: "2001:0db8:1234:ff00::/56",
subnetMaskSize: 72,
index: 0,
CIDRBlock: "2001:db8:1234:ff00::/72",
description: "1st IP address indexed with IPv6 /72",
},
{
clusterCIDRStr: "2001:0db8:1234:ff00::/56",
subnetMaskSize: 72,
index: 15,
CIDRBlock: "2001:db8:1234:ff00:f00::/72",
description: "16th IP address indexed with IPv6 /72",
},
{
clusterCIDRStr: "2001:0db8:1234:ff00::0370:7334/56",
subnetMaskSize: 72,
index: 6425,
CIDRBlock: "2001:db8:1234:ff19:1900::/72",
description: "6426th IP address indexed with IPv6 /72",
},
{
clusterCIDRStr: "2001:0db8:1234:0:1234::/80",
subnetMaskSize: 96,
index: 0,
CIDRBlock: "2001:db8:1234:0:1234::/96",
description: "1st IP address indexed with IPv6 /96",
},
{
clusterCIDRStr: "2001:0db8:1234:0:1234::/80",
subnetMaskSize: 96,
index: 15,
CIDRBlock: "2001:db8:1234:0:1234:f::/96",
description: "16th IP address indexed with IPv6 /96",
},
{
clusterCIDRStr: "2001:0db8:1234:ff00::0370:7334/80",
subnetMaskSize: 96,
index: 6425,
CIDRBlock: "2001:db8:1234:ff00:0:1919::/96",
description: "6426th IP address indexed with IPv6 /96",
},
}
for _, tc := range cases {
_, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr)
a, err := NewCIDRSet(clusterCIDR, tc.subnetMaskSize)
if err != nil {
t.Fatalf("error for %v ", tc.description)
}
cidr := a.indexToCIDRBlock(tc.index)
if cidr.String() != tc.CIDRBlock {
t.Fatalf("error for %v index %d %s", tc.description, tc.index, cidr.String())
}
}
}
func TestCIDRSet_RandomishAllocation(t *testing.T) {
cases := []struct {
clusterCIDRStr string
description string
}{
{
clusterCIDRStr: "127.123.234.0/16",
description: "RandomishAllocation with IPv4",
},
{
clusterCIDRStr: "beef:1234::/16",
description: "RandomishAllocation with IPv6",
},
}
for _, tc := range cases {
_, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr)
a, err := NewCIDRSet(clusterCIDR, 24)
if err != nil {
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
}
// allocate all the CIDRs
var cidrs []*net.IPNet
for i := 0; i < 256; i++ {
if c, err := a.AllocateNext(); err == nil {
cidrs = append(cidrs, c)
} else {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
}
//var err error
_, err = a.AllocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range for %v", tc.description)
}
// release them all
for i := 0; i < len(cidrs); i++ {
a.Release(cidrs[i])
}
// allocate the CIDRs again
var rcidrs []*net.IPNet
for i := 0; i < 256; i++ {
if c, err := a.AllocateNext(); err == nil {
rcidrs = append(rcidrs, c)
} else {
t.Fatalf("unexpected error: %d, %v for %v", i, err, tc.description)
}
}
_, err = a.AllocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range for %v", tc.description)
}
if !reflect.DeepEqual(cidrs, rcidrs) {
t.Fatalf("expected re-allocated cidrs are the same collection for %v", tc.description)
}
}
}
func TestCIDRSet_AllocationOccupied(t *testing.T) {
cases := []struct {
clusterCIDRStr string
description string
}{
{
clusterCIDRStr: "127.123.234.0/16",
description: "AllocationOccupied with IPv4",
},
{
clusterCIDRStr: "beef:1234::/16",
description: "AllocationOccupied with IPv6",
},
}
for _, tc := range cases {
_, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr)
a, err := NewCIDRSet(clusterCIDR, 24)
if err != nil {
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
}
// allocate all the CIDRs
var cidrs []*net.IPNet
var numCIDRs = 256
for i := 0; i < numCIDRs; i++ {
if c, err := a.AllocateNext(); err == nil {
cidrs = append(cidrs, c)
} else {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
}
//var err error
_, err = a.AllocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range for %v", tc.description)
}
// release them all
for i := 0; i < len(cidrs); i++ {
a.Release(cidrs[i])
}
// occupy the last 128 CIDRs
for i := numCIDRs / 2; i < numCIDRs; i++ {
a.Occupy(cidrs[i])
}
// allocate the first 128 CIDRs again
var rcidrs []*net.IPNet
for i := 0; i < numCIDRs/2; i++ {
if c, err := a.AllocateNext(); err == nil {
rcidrs = append(rcidrs, c)
} else {
t.Fatalf("unexpected error: %d, %v for %v", i, err, tc.description)
}
}
_, err = a.AllocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range for %v", tc.description)
}
// check Occupy() work properly
for i := numCIDRs / 2; i < numCIDRs; i++ {
rcidrs = append(rcidrs, cidrs[i])
}
if !reflect.DeepEqual(cidrs, rcidrs) {
t.Fatalf("expected re-allocated cidrs are the same collection for %v", tc.description)
}
}
}
func TestGetBitforCIDR(t *testing.T) {
cases := []struct {
clusterCIDRStr string
subNetMaskSize int
subNetCIDRStr string
expectedBit int
expectErr bool
description string
}{
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/16",
expectedBit: 0,
expectErr: false,
description: "Get 0 Bit with IPv4",
},
{
clusterCIDRStr: "be00::/8",
subNetMaskSize: 16,
subNetCIDRStr: "be00::/16",
expectedBit: 0,
expectErr: false,
description: "Get 0 Bit with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.123.0.0/16",
expectedBit: 123,
expectErr: false,
description: "Get 123rd Bit with IPv4",
},
{
clusterCIDRStr: "be00::/8",
subNetMaskSize: 16,
subNetCIDRStr: "beef::/16",
expectedBit: 0xef,
expectErr: false,
description: "Get xef Bit with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.168.0.0/16",
expectedBit: 168,
expectErr: false,
description: "Get 168th Bit with IPv4",
},
{
clusterCIDRStr: "be00::/8",
subNetMaskSize: 16,
subNetCIDRStr: "be68::/16",
expectedBit: 0x68,
expectErr: false,
description: "Get x68th Bit with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.224.0.0/16",
expectedBit: 224,
expectErr: false,
description: "Get 224th Bit with IPv4",
},
{
clusterCIDRStr: "be00::/8",
subNetMaskSize: 16,
subNetCIDRStr: "be24::/16",
expectedBit: 0x24,
expectErr: false,
description: "Get x24th Bit with IPv6",
},
{
clusterCIDRStr: "192.168.0.0/16",
subNetMaskSize: 24,
subNetCIDRStr: "192.168.12.0/24",
expectedBit: 12,
expectErr: false,
description: "Get 12th Bit with IPv4",
},
{
clusterCIDRStr: "beef::/16",
subNetMaskSize: 24,
subNetCIDRStr: "beef:1200::/24",
expectedBit: 0x12,
expectErr: false,
description: "Get x12th Bit with IPv6",
},
{
clusterCIDRStr: "192.168.0.0/16",
subNetMaskSize: 24,
subNetCIDRStr: "192.168.151.0/24",
expectedBit: 151,
expectErr: false,
description: "Get 151st Bit with IPv4",
},
{
clusterCIDRStr: "beef::/16",
subNetMaskSize: 24,
subNetCIDRStr: "beef:9700::/24",
expectedBit: 0x97,
expectErr: false,
description: "Get x97st Bit with IPv6",
},
{
clusterCIDRStr: "192.168.0.0/16",
subNetMaskSize: 24,
subNetCIDRStr: "127.168.224.0/24",
expectErr: true,
description: "Get error with IPv4",
},
{
clusterCIDRStr: "beef::/16",
subNetMaskSize: 24,
subNetCIDRStr: "2001:db00::/24",
expectErr: true,
description: "Get error with IPv6",
},
}
for _, tc := range cases {
_, clusterCIDR, err := net.ParseCIDR(tc.clusterCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
if err != nil {
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
}
_, subnetCIDR, err := net.ParseCIDR(tc.subNetCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
got, err := cs.getIndexForCIDR(subnetCIDR)
if err == nil && tc.expectErr {
glog.Errorf("expected error but got null for %v", tc.description)
continue
}
if err != nil && !tc.expectErr {
glog.Errorf("unexpected error: %v for %v", err, tc.description)
continue
}
if got != tc.expectedBit {
glog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
}
}
}
func TestOccupy(t *testing.T) {
cases := []struct {
clusterCIDRStr string
subNetMaskSize int
subNetCIDRStr string
expectedUsedBegin int
expectedUsedEnd int
expectErr bool
description string
}{
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/8",
expectedUsedBegin: 0,
expectedUsedEnd: 255,
expectErr: false,
description: "Occupy all Bits with IPv4",
},
{
clusterCIDRStr: "2001:beef:1200::/40",
subNetMaskSize: 48,
subNetCIDRStr: "2001:beef:1200::/40",
expectedUsedBegin: 0,
expectedUsedEnd: 255,
expectErr: false,
description: "Occupy all Bits with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/2",
expectedUsedBegin: 0,
expectedUsedEnd: 255,
expectErr: false,
description: "Occupy every Bit with IPv4",
},
{
clusterCIDRStr: "2001:beef:1200::/40",
subNetMaskSize: 48,
subNetCIDRStr: "2001:beef:1234::/34",
expectedUsedBegin: 0,
expectedUsedEnd: 255,
expectErr: false,
description: "Occupy every Bit with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/16",
expectedUsedBegin: 0,
expectedUsedEnd: 0,
expectErr: false,
description: "Occupy 1st Bit with IPv4",
},
{
clusterCIDRStr: "2001:beef:1200::/40",
subNetMaskSize: 48,
subNetCIDRStr: "2001:beef:1200::/48",
expectedUsedBegin: 0,
expectedUsedEnd: 0,
expectErr: false,
description: "Occupy 1st Bit with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 32,
subNetCIDRStr: "127.0.0.0/16",
expectedUsedBegin: 0,
expectedUsedEnd: 65535,
expectErr: false,
description: "Occupy 65535 Bits with IPv4",
},
{
clusterCIDRStr: "2001:beef:1200::/48",
subNetMaskSize: 64,
subNetCIDRStr: "2001:beef:1200::/48",
expectedUsedBegin: 0,
expectedUsedEnd: 65535,
expectErr: false,
description: "Occupy 65535 Bits with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/7",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/15",
expectedUsedBegin: 256,
expectedUsedEnd: 257,
expectErr: false,
description: "Occupy 257th Bit with IPv4",
},
{
clusterCIDRStr: "2001:beef:7f00::/39",
subNetMaskSize: 48,
subNetCIDRStr: "2001:beef:7f00::/47",
expectedUsedBegin: 256,
expectedUsedEnd: 257,
expectErr: false,
description: "Occupy 257th Bit with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/7",
subNetMaskSize: 15,
subNetCIDRStr: "127.0.0.0/15",
expectedUsedBegin: 128,
expectedUsedEnd: 128,
expectErr: false,
description: "Occupy 128th Bit with IPv4",
},
{
clusterCIDRStr: "2001:beef:7f00::/39",
subNetMaskSize: 47,
subNetCIDRStr: "2001:beef:7f00::/47",
expectedUsedBegin: 128,
expectedUsedEnd: 128,
expectErr: false,
description: "Occupy 128th Bit with IPv6",
},
{
clusterCIDRStr: "127.0.0.0/7",
subNetMaskSize: 18,
subNetCIDRStr: "127.0.0.0/15",
expectedUsedBegin: 1024,
expectedUsedEnd: 1031,
expectErr: false,
description: "Occupy 1031st Bit with IPv4",
},
{
clusterCIDRStr: "2001:beef:7f00::/39",
subNetMaskSize: 50,
subNetCIDRStr: "2001:beef:7f00::/47",
expectedUsedBegin: 1024,
expectedUsedEnd: 1031,
expectErr: false,
description: "Occupy 1031st Bit with IPv6",
},
}
for _, tc := range cases {
_, clusterCIDR, err := net.ParseCIDR(tc.clusterCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
if err != nil {
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
}
_, subnetCIDR, err := net.ParseCIDR(tc.subNetCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
err = cs.Occupy(subnetCIDR)
if err == nil && tc.expectErr {
t.Errorf("expected error but got none for %v", tc.description)
continue
}
if err != nil && !tc.expectErr {
t.Errorf("unexpected error: %v for %v", err, tc.description)
continue
}
expectedUsed := big.Int{}
for i := tc.expectedUsedBegin; i <= tc.expectedUsedEnd; i++ {
expectedUsed.SetBit(&expectedUsed, i, 1)
}
if expectedUsed.Cmp(&cs.used) != 0 {
t.Errorf("error for %v", tc.description)
}
}
}
func TestCIDRSetv6(t *testing.T) {
cases := []struct {
clusterCIDRStr string
subNetMaskSize int
expectedCIDR string
expectedCIDR2 string
expectErr bool
description string
}{
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 32,
expectErr: false,
expectedCIDR: "127.0.0.0/32",
expectedCIDR2: "127.0.0.1/32",
description: "Max cluster subnet size with IPv4",
},
{
clusterCIDRStr: "beef:1234::/32",
subNetMaskSize: 49,
expectErr: true,
description: "Max cluster subnet size with IPv6",
},
{
clusterCIDRStr: "2001:beef:1234:369b::/60",
subNetMaskSize: 64,
expectedCIDR: "2001:beef:1234:3690::/64",
expectedCIDR2: "2001:beef:1234:3691::/64",
expectErr: false,
description: "Allocate a few IPv6",
},
}
for _, tc := range cases {
_, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr)
a, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
if err != nil {
if tc.expectErr {
continue
}
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
}
p, err := a.AllocateNext()
if err == nil && tc.expectErr {
t.Errorf("expected error but got none for %v", tc.description)
continue
}
if err != nil && !tc.expectErr {
t.Errorf("unexpected error: %v for %v", err, tc.description)
continue
}
if !tc.expectErr {
if p.String() != tc.expectedCIDR {
t.Fatalf("unexpected allocated cidr: %s for %v", p.String(), tc.description)
}
}
p2, err := a.AllocateNext()
if !tc.expectErr {
if p2.String() != tc.expectedCIDR2 {
t.Fatalf("unexpected allocated cidr: %s for %v", p2.String(), tc.description)
}
}
}
}

View File

@ -0,0 +1,262 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"fmt"
"net"
"sync"
"github.com/golang/glog"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
informers "k8s.io/client-go/informers/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
v1node "k8s.io/kubernetes/pkg/api/v1/node"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/node/util"
utilnode "k8s.io/kubernetes/pkg/util/node"
)
// cloudCIDRAllocator allocates node CIDRs according to IP address aliases
// assigned by the cloud provider. In this case, the allocation and
// deallocation is delegated to the external provider, and the controller
// merely takes the assignment and updates the node spec.
type cloudCIDRAllocator struct {
client clientset.Interface
cloud *gce.GCECloud
// nodeLister is able to list/get nodes and is populated by the shared informer passed to
// NewCloudCIDRAllocator.
nodeLister corelisters.NodeLister
// nodesSynced returns true if the node shared informer has been synced at least once.
nodesSynced cache.InformerSynced
// Channel that is used to pass updating Nodes to the background.
// This increases the throughput of CIDR assignment by parallelization
// and not blocking on long operations (which shouldn't be done from
// event handlers anyway).
nodeUpdateChannel chan string
recorder record.EventRecorder
// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation
lock sync.Mutex
nodesInProcessing sets.String
}
var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
if client == nil {
glog.Fatalf("kubeClient is nil when starting NodeController")
}
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cidrAllocator"})
eventBroadcaster.StartLogging(glog.Infof)
glog.V(0).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.CoreV1().RESTClient()).Events("")})
gceCloud, ok := cloud.(*gce.GCECloud)
if !ok {
err := fmt.Errorf("cloudCIDRAllocator does not support %v provider", cloud.ProviderName())
return nil, err
}
ca := &cloudCIDRAllocator{
client: client,
cloud: gceCloud,
nodeLister: nodeInformer.Lister(),
nodesSynced: nodeInformer.Informer().HasSynced,
nodeUpdateChannel: make(chan string, cidrUpdateQueueSize),
recorder: recorder,
nodesInProcessing: sets.NewString(),
}
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: util.CreateAddNodeHandler(ca.AllocateOrOccupyCIDR),
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
if newNode.Spec.PodCIDR == "" {
return ca.AllocateOrOccupyCIDR(newNode)
}
// Even if PodCIDR is assigned, but NetworkUnavailable condition is
// set to true, we need to process the node to set the condition.
_, cond := v1node.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable)
if cond == nil || cond.Status != v1.ConditionFalse {
return ca.AllocateOrOccupyCIDR(newNode)
}
return nil
}),
DeleteFunc: util.CreateDeleteNodeHandler(ca.ReleaseCIDR),
})
glog.V(0).Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName())
return ca, nil
}
func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
glog.Infof("Starting cloud CIDR allocator")
defer glog.Infof("Shutting down cloud CIDR allocator")
if !controller.WaitForCacheSync("cidrallocator", stopCh, ca.nodesSynced) {
return
}
for i := 0; i < cidrUpdateWorkers; i++ {
go ca.worker(stopCh)
}
<-stopCh
}
func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) {
for {
select {
case workItem, ok := <-ca.nodeUpdateChannel:
if !ok {
glog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
return
}
if err := ca.updateCIDRAllocation(workItem); err != nil {
// Requeue the failed node for update again.
ca.nodeUpdateChannel <- workItem
}
case <-stopChan:
return
}
}
}
func (ca *cloudCIDRAllocator) insertNodeToProcessing(nodeName string) bool {
ca.lock.Lock()
defer ca.lock.Unlock()
if ca.nodesInProcessing.Has(nodeName) {
return false
}
ca.nodesInProcessing.Insert(nodeName)
return true
}
func (ca *cloudCIDRAllocator) removeNodeFromProcessing(nodeName string) {
ca.lock.Lock()
defer ca.lock.Unlock()
ca.nodesInProcessing.Delete(nodeName)
}
// WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done.
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
if node == nil {
return nil
}
if !ca.insertNodeToProcessing(node.Name) {
glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
return nil
}
glog.V(4).Infof("Putting node %s into the work queue", node.Name)
ca.nodeUpdateChannel <- node.Name
return nil
}
// updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
var err error
var node *v1.Node
defer ca.removeNodeFromProcessing(nodeName)
cidrs, err := ca.cloud.AliasRanges(types.NodeName(nodeName))
if err != nil {
util.RecordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to allocate cidr: %v", err)
}
if len(cidrs) == 0 {
util.RecordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to allocate cidr: Node %v has no CIDRs", node.Name)
}
_, cidr, err := net.ParseCIDR(cidrs[0])
if err != nil {
return fmt.Errorf("failed to parse string '%s' as a CIDR: %v", cidrs[0], err)
}
podCIDR := cidr.String()
for rep := 0; rep < cidrUpdateRetries; rep++ {
node, err = ca.nodeLister.Get(nodeName)
if err != nil {
glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", nodeName, err)
continue
}
if node.Spec.PodCIDR != "" {
if node.Spec.PodCIDR == podCIDR {
glog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, podCIDR)
// We don't return to set the NetworkUnavailable condition if needed.
break
}
glog.Errorf("PodCIDR being reassigned! Node %v spec has %v, but cloud provider has assigned %v",
node.Name, node.Spec.PodCIDR, podCIDR)
// We fall through and set the CIDR despite this error. This
// implements the same logic as implemented in the
// rangeAllocator.
//
// See https://github.com/kubernetes/kubernetes/pull/42147#discussion_r103357248
}
if err = utilnode.PatchNodeCIDR(ca.client, types.NodeName(node.Name), podCIDR); err == nil {
glog.Infof("Set node %v PodCIDR to %v", node.Name, podCIDR)
break
}
glog.Errorf("Failed to update node %v PodCIDR to %v (%d retries left): %v", node.Name, podCIDR, cidrUpdateRetries-rep-1, err)
}
if err != nil {
util.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
glog.Errorf("CIDR assignment for node %v failed: %v.", nodeName, err)
return err
}
err = utilnode.SetNodeCondition(ca.client, types.NodeName(node.Name), v1.NodeCondition{
Type: v1.NodeNetworkUnavailable,
Status: v1.ConditionFalse,
Reason: "RouteCreated",
Message: "NodeController create implicit route",
LastTransitionTime: metav1.Now(),
})
if err != nil {
glog.Errorf("Error setting route status for node %v: %v", node.Name, err)
}
return err
}
func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error {
glog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)",
node.Name, node.Spec.PodCIDR)
return nil
}

View File

@ -0,0 +1,215 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"fmt"
"net"
"sync"
"time"
"github.com/golang/glog"
"k8s.io/api/core/v1"
informers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
"k8s.io/kubernetes/pkg/controller/node/ipam/cidrset"
nodesync "k8s.io/kubernetes/pkg/controller/node/ipam/sync"
"k8s.io/kubernetes/pkg/controller/node/util"
)
// Config for the IPAM controller.
type Config struct {
// Resync is the default timeout duration when there are no errors.
Resync time.Duration
// MaxBackoff is the maximum timeout when in a error backoff state.
MaxBackoff time.Duration
// InitialRetry is the initial retry interval when an error is reported.
InitialRetry time.Duration
// Mode to use to synchronize.
Mode nodesync.NodeSyncMode
}
// Controller is the controller for synchronizing cluster and cloud node
// pod CIDR range assignments.
type Controller struct {
config *Config
adapter *adapter
lock sync.Mutex
syncers map[string]*nodesync.NodeSync
set *cidrset.CidrSet
}
// NewController returns a new instance of the IPAM controller.
func NewController(
config *Config,
kubeClient clientset.Interface,
cloud cloudprovider.Interface,
clusterCIDR, serviceCIDR *net.IPNet,
nodeCIDRMaskSize int) (*Controller, error) {
if !nodesync.IsValidMode(config.Mode) {
return nil, fmt.Errorf("invalid IPAM controller mode %q", config.Mode)
}
gceCloud, ok := cloud.(*gce.GCECloud)
if !ok {
return nil, fmt.Errorf("cloud IPAM controller does not support %q provider", cloud.ProviderName())
}
set, err := cidrset.NewCIDRSet(clusterCIDR, nodeCIDRMaskSize)
if err != nil {
return nil, err
}
c := &Controller{
config: config,
adapter: newAdapter(kubeClient, gceCloud),
syncers: make(map[string]*nodesync.NodeSync),
set: set,
}
if err := occupyServiceCIDR(c.set, clusterCIDR, serviceCIDR); err != nil {
return nil, err
}
return c, nil
}
// Start initializes the Controller with the existing list of nodes and
// registers the informers for node chnages. This will start synchronization
// of the node and cloud CIDR range allocations.
func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
glog.V(0).Infof("Starting IPAM controller (config=%+v)", c.config)
nodes, err := listNodes(c.adapter.k8s)
if err != nil {
return err
}
for _, node := range nodes.Items {
if node.Spec.PodCIDR != "" {
_, cidrRange, err := net.ParseCIDR(node.Spec.PodCIDR)
if err == nil {
c.set.Occupy(cidrRange)
glog.V(3).Infof("Occupying CIDR for node %q (%v)", node.Name, node.Spec.PodCIDR)
} else {
glog.Errorf("Node %q has an invalid CIDR (%q): %v", node.Name, node.Spec.PodCIDR, err)
}
}
func() {
c.lock.Lock()
defer c.lock.Unlock()
// XXX/bowei -- stagger the start of each sync cycle.
syncer := c.newSyncer(node.Name)
c.syncers[node.Name] = syncer
go syncer.Loop(nil)
}()
}
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: util.CreateAddNodeHandler(c.onAdd),
UpdateFunc: util.CreateUpdateNodeHandler(c.onUpdate),
DeleteFunc: util.CreateDeleteNodeHandler(c.onDelete),
})
return nil
}
// occupyServiceCIDR removes the service CIDR range from the cluster CIDR if it
// intersects.
func occupyServiceCIDR(set *cidrset.CidrSet, clusterCIDR, serviceCIDR *net.IPNet) error {
if clusterCIDR.Contains(serviceCIDR.IP) || serviceCIDR.Contains(clusterCIDR.IP) {
if err := set.Occupy(serviceCIDR); err != nil {
return err
}
}
return nil
}
type nodeState struct {
t Timeout
}
func (ns *nodeState) ReportResult(err error) {
ns.t.Update(err == nil)
}
func (ns *nodeState) ResyncTimeout() time.Duration {
return ns.t.Next()
}
func (c *Controller) newSyncer(name string) *nodesync.NodeSync {
ns := &nodeState{
Timeout{
Resync: c.config.Resync,
MaxBackoff: c.config.MaxBackoff,
InitialRetry: c.config.InitialRetry,
},
}
return nodesync.New(ns, c.adapter, c.adapter, c.config.Mode, name, c.set)
}
func (c *Controller) onAdd(node *v1.Node) error {
c.lock.Lock()
defer c.lock.Unlock()
if syncer, ok := c.syncers[node.Name]; !ok {
syncer = c.newSyncer(node.Name)
c.syncers[node.Name] = syncer
go syncer.Loop(nil)
} else {
glog.Warningf("Add for node %q that already exists", node.Name)
syncer.Update(node)
}
return nil
}
func (c *Controller) onUpdate(_, node *v1.Node) error {
c.lock.Lock()
defer c.lock.Unlock()
if sync, ok := c.syncers[node.Name]; ok {
sync.Update(node)
} else {
glog.Errorf("Received update for non-existant node %q", node.Name)
return fmt.Errorf("unknown node %q", node.Name)
}
return nil
}
func (c *Controller) onDelete(node *v1.Node) error {
c.lock.Lock()
defer c.lock.Unlock()
if syncer, ok := c.syncers[node.Name]; ok {
syncer.Delete(node)
delete(c.syncers, node.Name)
} else {
glog.Warning("Node %q was already deleted", node.Name)
}
return nil
}

View File

@ -0,0 +1,67 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"net"
"testing"
"k8s.io/kubernetes/pkg/controller/node/ipam/cidrset"
"k8s.io/kubernetes/pkg/controller/node/ipam/test"
)
func TestOccupyServiceCIDR(t *testing.T) {
const clusterCIDR = "10.1.0.0/16"
TestCase:
for _, tc := range []struct {
serviceCIDR string
}{
{"10.0.255.0/24"},
{"10.1.0.0/24"},
{"10.1.255.0/24"},
{"10.2.0.0/24"},
} {
serviceCIDR := test.MustParseCIDR(tc.serviceCIDR)
set, err := cidrset.NewCIDRSet(test.MustParseCIDR(clusterCIDR), 24)
if err != nil {
t.Errorf("test case %+v: NewCIDRSet() = %v, want nil", tc, err)
}
if err := occupyServiceCIDR(set, test.MustParseCIDR(clusterCIDR), serviceCIDR); err != nil {
t.Errorf("test case %+v: occupyServiceCIDR() = %v, want nil", tc, err)
}
// Allocate until full.
var cidrs []*net.IPNet
for {
cidr, err := set.AllocateNext()
if err != nil {
if err == cidrset.ErrCIDRRangeNoCIDRsRemaining {
break
}
t.Errorf("set.AllocateNext() = %v, want %v", err, cidrset.ErrCIDRRangeNoCIDRsRemaining)
continue TestCase
}
cidrs = append(cidrs, cidr)
}
// No allocated CIDR range should intersect with serviceCIDR.
for _, c := range cidrs {
if c.Contains(serviceCIDR.IP) || serviceCIDR.Contains(c.IP) {
t.Errorf("test case %+v: allocated CIDR %v from service range", tc, c)
}
}
}
}

View File

@ -0,0 +1,30 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package ipam provides different allocators for assigning IP ranges to nodes.
// We currently support several kinds of IPAM allocators (these are denoted by
// the CIDRAllocatorType):
// - RangeAllocator is an allocator that assigns PodCIDRs to nodes and works
// in conjunction with the RouteController to configure the network to get
// connectivity.
// - CloudAllocator is an allocator that synchronizes PodCIDRs from IP
// ranges assignments from the underlying cloud platform.
// - (Alpha only) IPAMFromCluster is an allocator that has the similar
// functionality as the RangeAllocator but also synchronizes cluster-managed
// ranges into the cloud platform.
// - (Alpha only) IPAMFromCloud is the same as CloudAllocator (synchronizes
// from cloud into the cluster.)
package ipam

View File

@ -0,0 +1,325 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"fmt"
"net"
"sync"
"github.com/golang/glog"
"k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
informers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/node/ipam/cidrset"
"k8s.io/kubernetes/pkg/controller/node/util"
nodeutil "k8s.io/kubernetes/pkg/util/node"
)
type rangeAllocator struct {
client clientset.Interface
cidrs *cidrset.CidrSet
clusterCIDR *net.IPNet
maxCIDRs int
// nodeLister is able to list/get nodes and is populated by the shared informer passed to
// NewCloudCIDRAllocator.
nodeLister corelisters.NodeLister
// nodesSynced returns true if the node shared informer has been synced at least once.
nodesSynced cache.InformerSynced
// Channel that is used to pass updating Nodes with assigned CIDRs to the background
// This increases a throughput of CIDR assignment by not blocking on long operations.
nodeCIDRUpdateChannel chan nodeAndCIDR
recorder record.EventRecorder
// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation
lock sync.Mutex
nodesInProcessing sets.String
}
// NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node
// Caller must ensure subNetMaskSize is not less than cluster CIDR mask size.
// Caller must always pass in a list of existing nodes so the new allocator
// can initialize its CIDR map. NodeList is only nil in testing.
func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
if client == nil {
glog.Fatalf("kubeClient is nil when starting NodeController")
}
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cidrAllocator"})
eventBroadcaster.StartLogging(glog.Infof)
glog.V(0).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.CoreV1().RESTClient()).Events("")})
set, err := cidrset.NewCIDRSet(clusterCIDR, subNetMaskSize)
if err != nil {
return nil, err
}
ra := &rangeAllocator{
client: client,
cidrs: set,
clusterCIDR: clusterCIDR,
nodeLister: nodeInformer.Lister(),
nodesSynced: nodeInformer.Informer().HasSynced,
nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize),
recorder: recorder,
nodesInProcessing: sets.NewString(),
}
if serviceCIDR != nil {
ra.filterOutServiceRange(serviceCIDR)
} else {
glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.")
}
if nodeList != nil {
for _, node := range nodeList.Items {
if node.Spec.PodCIDR == "" {
glog.Infof("Node %v has no CIDR, ignoring", node.Name)
continue
} else {
glog.Infof("Node %v has CIDR %s, occupying it in CIDR map",
node.Name, node.Spec.PodCIDR)
}
if err := ra.occupyCIDR(&node); err != nil {
// This will happen if:
// 1. We find garbage in the podCIDR field. Retrying is useless.
// 2. CIDR out of range: This means a node CIDR has changed.
// This error will keep crashing controller-manager.
return nil, err
}
}
}
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: util.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR),
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
// If the PodCIDR is not empty we either:
// - already processed a Node that already had a CIDR after NC restarted
// (cidr is marked as used),
// - already processed a Node successfully and allocated a CIDR for it
// (cidr is marked as used),
// - already processed a Node but we did saw a "timeout" response and
// request eventually got through in this case we haven't released
// the allocated CIDR (cidr is still marked as used).
// There's a possible error here:
// - NC sees a new Node and assigns a CIDR X to it,
// - Update Node call fails with a timeout,
// - Node is updated by some other component, NC sees an update and
// assigns CIDR Y to the Node,
// - Both CIDR X and CIDR Y are marked as used in the local cache,
// even though Node sees only CIDR Y
// The problem here is that in in-memory cache we see CIDR X as marked,
// which prevents it from being assigned to any new node. The cluster
// state is correct.
// Restart of NC fixes the issue.
if newNode.Spec.PodCIDR == "" {
return ra.AllocateOrOccupyCIDR(newNode)
}
return nil
}),
DeleteFunc: util.CreateDeleteNodeHandler(ra.ReleaseCIDR),
})
return ra, nil
}
func (r *rangeAllocator) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
glog.Infof("Starting range CIDR allocator")
defer glog.Infof("Shutting down range CIDR allocator")
if !controller.WaitForCacheSync("cidrallocator", stopCh, r.nodesSynced) {
return
}
for i := 0; i < cidrUpdateWorkers; i++ {
go r.worker(stopCh)
}
<-stopCh
}
func (r *rangeAllocator) worker(stopChan <-chan struct{}) {
for {
select {
case workItem, ok := <-r.nodeCIDRUpdateChannel:
if !ok {
glog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
return
}
if err := r.updateCIDRAllocation(workItem); err != nil {
// Requeue the failed node for update again.
r.nodeCIDRUpdateChannel <- workItem
}
case <-stopChan:
return
}
}
}
func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool {
r.lock.Lock()
defer r.lock.Unlock()
if r.nodesInProcessing.Has(nodeName) {
return false
}
r.nodesInProcessing.Insert(nodeName)
return true
}
func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) {
r.lock.Lock()
defer r.lock.Unlock()
r.nodesInProcessing.Delete(nodeName)
}
func (r *rangeAllocator) occupyCIDR(node *v1.Node) error {
defer r.removeNodeFromProcessing(node.Name)
if node.Spec.PodCIDR == "" {
return nil
}
_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
if err != nil {
return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR)
}
if err := r.cidrs.Occupy(podCIDR); err != nil {
return fmt.Errorf("failed to mark cidr as occupied: %v", err)
}
return nil
}
// WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done.
func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
if node == nil {
return nil
}
if !r.insertNodeToProcessing(node.Name) {
glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
return nil
}
if node.Spec.PodCIDR != "" {
return r.occupyCIDR(node)
}
podCIDR, err := r.cidrs.AllocateNext()
if err != nil {
r.removeNodeFromProcessing(node.Name)
util.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to allocate cidr: %v", err)
}
glog.V(4).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR)
r.nodeCIDRUpdateChannel <- nodeAndCIDR{
nodeName: node.Name,
cidr: podCIDR,
}
return nil
}
func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
if node == nil || node.Spec.PodCIDR == "" {
return nil
}
_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
if err != nil {
return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err)
}
glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR)
if err = r.cidrs.Release(podCIDR); err != nil {
return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err)
}
return err
}
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used,
// so that they won't be assignable.
func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
// Checks if service CIDR has a nonempty intersection with cluster
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
// clusterCIDR's Mask applied (this means that clusterCIDR contains
// serviceCIDR) or vice versa (which means that serviceCIDR contains
// clusterCIDR).
if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) {
return
}
if err := r.cidrs.Occupy(serviceCIDR); err != nil {
glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err)
}
}
// updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
var err error
var node *v1.Node
defer r.removeNodeFromProcessing(data.nodeName)
podCIDR := data.cidr.String()
for rep := 0; rep < cidrUpdateRetries; rep++ {
node, err = r.nodeLister.Get(data.nodeName)
if err != nil {
glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
continue
}
if node.Spec.PodCIDR != "" {
glog.V(4).Infof("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR)
if node.Spec.PodCIDR != podCIDR {
glog.Errorf("Node %q PodCIDR seems to have changed (original=%v, current=%v), releasing original and occupying new CIDR",
node.Name, node.Spec.PodCIDR, podCIDR)
if err := r.cidrs.Release(data.cidr); err != nil {
glog.Errorf("Error when releasing CIDR %v", podCIDR)
}
}
return nil
}
if err = nodeutil.PatchNodeCIDR(r.client, types.NodeName(node.Name), podCIDR); err == nil {
glog.Infof("Set node %v PodCIDR to %v", node.Name, podCIDR)
break
}
glog.Errorf("Failed to update node %v PodCIDR to %v (%d retries left): %v", node.Name, podCIDR, cidrUpdateRetries-rep-1, err)
}
if err != nil {
util.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
// We accept the fact that we may leek CIDRs here. This is safer than releasing
// them in case when we don't know if request went through.
// NodeController restart will return all falsely allocated CIDRs to the pool.
if !apierrors.IsServerTimeout(err) {
glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err)
if releaseErr := r.cidrs.Release(data.cidr); releaseErr != nil {
glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr)
}
}
}
return err
}

View File

@ -0,0 +1,432 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"net"
"testing"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
coreinformers "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/testutil"
)
const (
nodePollInterval = 100 * time.Millisecond
)
var alwaysReady = func() bool { return true }
func waitForUpdatedNodeWithTimeout(nodeHandler *testutil.FakeNodeHandler, number int, timeout time.Duration) error {
return wait.Poll(nodePollInterval, timeout, func() (bool, error) {
if len(nodeHandler.GetUpdatedNodesCopy()) >= number {
return true, nil
}
return false, nil
})
}
// Creates a fakeNodeInformer using the provided fakeNodeHandler.
func getFakeNodeInformer(fakeNodeHandler *testutil.FakeNodeHandler) coreinformers.NodeInformer {
fakeClient := &fake.Clientset{}
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes()
for _, node := range fakeNodeHandler.Existing {
fakeNodeInformer.Informer().GetStore().Add(node)
}
return fakeNodeInformer
}
func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
testCases := []struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDR string
allocatedCIDRs []string
}{
{
description: "When there's no ServiceCIDR return first CIDR in range",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
expectedAllocatedCIDR: "127.123.234.0/30",
},
{
description: "Correctly filter out ServiceCIDR",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24")
return clusterCIDR
}(),
serviceCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/26")
return clusterCIDR
}(),
subNetMaskSize: 30,
// it should return first /30 CIDR after service range
expectedAllocatedCIDR: "127.123.234.64/30",
},
{
description: "Correctly ignore already allocated CIDRs",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24")
return clusterCIDR
}(),
serviceCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/26")
return clusterCIDR
}(),
subNetMaskSize: 30,
allocatedCIDRs: []string{"127.123.234.64/30", "127.123.234.68/30", "127.123.234.72/30", "127.123.234.80/30"},
expectedAllocatedCIDR: "127.123.234.76/30",
},
}
testFunc := func(tc struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDR string
allocatedCIDRs []string
}) {
// Initialize the range allocator.
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.clusterCIDR, tc.serviceCIDR, tc.subNetMaskSize, nil)
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
return
}
rangeAllocator.nodesSynced = alwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop)
// this is a bit of white box testing
for _, allocated := range tc.allocatedCIDRs {
_, cidr, err := net.ParseCIDR(allocated)
if err != nil {
t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err)
}
if err = rangeAllocator.cidrs.Occupy(cidr); err != nil {
t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err)
}
}
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err)
}
found := false
seenCIDRs := []string{}
for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() {
seenCIDRs = append(seenCIDRs, updatedNode.Spec.PodCIDR)
if updatedNode.Spec.PodCIDR == tc.expectedAllocatedCIDR {
found = true
break
}
}
if !found {
t.Errorf("%v: Unable to find allocated CIDR %v, found updated Nodes with CIDRs: %v",
tc.description, tc.expectedAllocatedCIDR, seenCIDRs)
}
}
for _, tc := range testCases {
testFunc(tc)
}
}
func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
testCases := []struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
allocatedCIDRs []string
}{
{
description: "When there's no ServiceCIDR return first CIDR in range",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
allocatedCIDRs: []string{"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"},
},
}
testFunc := func(tc struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
allocatedCIDRs []string
}) {
// Initialize the range allocator.
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.clusterCIDR, tc.serviceCIDR, tc.subNetMaskSize, nil)
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
return
}
rangeAllocator.nodesSynced = alwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop)
// this is a bit of white box testing
for _, allocated := range tc.allocatedCIDRs {
_, cidr, err := net.ParseCIDR(allocated)
if err != nil {
t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err)
}
err = rangeAllocator.cidrs.Occupy(cidr)
if err != nil {
t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err)
}
}
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
}
// We don't expect any updates, so just sleep for some time
time.Sleep(time.Second)
if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 {
t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy())
}
seenCIDRs := []string{}
for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() {
if updatedNode.Spec.PodCIDR != "" {
seenCIDRs = append(seenCIDRs, updatedNode.Spec.PodCIDR)
}
}
if len(seenCIDRs) != 0 {
t.Errorf("%v: Seen assigned CIDRs when not expected: %v",
tc.description, seenCIDRs)
}
}
for _, tc := range testCases {
testFunc(tc)
}
}
func TestReleaseCIDRSuccess(t *testing.T) {
testCases := []struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDRFirstRound string
expectedAllocatedCIDRSecondRound string
allocatedCIDRs []string
cidrsToRelease []string
}{
{
description: "Correctly release preallocated CIDR",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
allocatedCIDRs: []string{"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"},
expectedAllocatedCIDRFirstRound: "",
cidrsToRelease: []string{"127.123.234.4/30"},
expectedAllocatedCIDRSecondRound: "127.123.234.4/30",
},
{
description: "Correctly recycle CIDR",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
allocatedCIDRs: []string{"127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"},
expectedAllocatedCIDRFirstRound: "127.123.234.0/30",
cidrsToRelease: []string{"127.123.234.0/30"},
expectedAllocatedCIDRSecondRound: "127.123.234.0/30",
},
}
testFunc := func(tc struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDRFirstRound string
expectedAllocatedCIDRSecondRound string
allocatedCIDRs []string
cidrsToRelease []string
}) {
// Initialize the range allocator.
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.clusterCIDR, tc.serviceCIDR, tc.subNetMaskSize, nil)
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
return
}
rangeAllocator.nodesSynced = alwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop)
// this is a bit of white box testing
for _, allocated := range tc.allocatedCIDRs {
_, cidr, err := net.ParseCIDR(allocated)
if err != nil {
t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err)
}
err = rangeAllocator.cidrs.Occupy(cidr)
if err != nil {
t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err)
}
}
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
if tc.expectedAllocatedCIDRFirstRound != "" {
if err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err)
}
} else {
if err == nil {
t.Fatalf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
}
// We don't expect any updates here
time.Sleep(time.Second)
if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 {
t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy())
}
}
for _, cidrToRelease := range tc.cidrsToRelease {
nodeToRelease := v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
}
nodeToRelease.Spec.PodCIDR = cidrToRelease
err = allocator.ReleaseCIDR(&nodeToRelease)
if err != nil {
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
}
}
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err)
}
found := false
seenCIDRs := []string{}
for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() {
seenCIDRs = append(seenCIDRs, updatedNode.Spec.PodCIDR)
if updatedNode.Spec.PodCIDR == tc.expectedAllocatedCIDRSecondRound {
found = true
break
}
}
if !found {
t.Errorf("%v: Unable to find allocated CIDR %v, found updated Nodes with CIDRs: %v",
tc.description, tc.expectedAllocatedCIDRSecondRound, seenCIDRs)
}
}
for _, tc := range testCases {
testFunc(tc)
}
}

View File

@ -0,0 +1,41 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["sync.go"],
importpath = "k8s.io/kubernetes/pkg/controller/node/ipam/sync",
visibility = ["//visibility:public"],
deps = [
"//pkg/controller/node/ipam/cidrset:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["sync_test.go"],
importpath = "k8s.io/kubernetes/pkg/controller/node/ipam/sync",
library = ":go_default_library",
deps = [
"//pkg/controller/node/ipam/cidrset:go_default_library",
"//pkg/controller/node/ipam/test:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,381 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sync
import (
"context"
"fmt"
"net"
"time"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/controller/node/ipam/cidrset"
)
const (
// InvalidPodCIDR is the event recorded when a node is found with an
// invalid PodCIDR.
InvalidPodCIDR = "CloudCIDRAllocatorInvalidPodCIDR"
// InvalidModeEvent is the event recorded when the CIDR range cannot be
// sync'd due to the cluster running in the wrong mode.
InvalidModeEvent = "CloudCIDRAllocatorInvalidMode"
// MismatchEvent is the event recorded when the CIDR range allocated in the
// node spec does not match what has been allocated in the cloud.
MismatchEvent = "CloudCIDRAllocatorMismatch"
)
// cloudAlias is the interface to the cloud platform APIs.
type cloudAlias interface {
// Alias returns the IP alias for the node.
Alias(ctx context.Context, nodeName string) (*net.IPNet, error)
// AddAlias adds an alias to the node.
AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error
}
// kubeAPI is the interface to the Kubernetes APIs.
type kubeAPI interface {
// Node returns the spec for the Node object.
Node(ctx context.Context, name string) (*v1.Node, error)
// UpdateNodePodCIDR updates the PodCIDR in the Node spec.
UpdateNodePodCIDR(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error
// UpdateNodeNetworkUnavailable updates the network unavailable status for the node.
UpdateNodeNetworkUnavailable(nodeName string, unavailable bool) error
// EmitNodeWarningEvent emits an event for the given node.
EmitNodeWarningEvent(nodeName, reason, fmt string, args ...interface{})
}
// controller is the interface to the controller.
type controller interface {
// ReportResult updates the controller with the result of the latest
// sync operation.
ReportResult(err error)
// ResyncTimeout returns the amount of time to wait before retrying
// a sync with a node.
ResyncTimeout() time.Duration
}
// NodeSyncMode is the mode the cloud CIDR allocator runs in.
type NodeSyncMode string
var (
// SyncFromCloud is the mode that synchronizes the IP allocation from the cloud
// platform to the node.
SyncFromCloud NodeSyncMode = "SyncFromCloud"
// SyncFromCluster is the mode that synchronizes the IP allocation determined
// by the k8s controller to the cloud provider.
SyncFromCluster NodeSyncMode = "SyncFromCluster"
)
// IsValidMode returns true if the given mode is valid.
func IsValidMode(m NodeSyncMode) bool {
switch m {
case SyncFromCloud:
case SyncFromCluster:
default:
return false
}
return true
}
// NodeSync synchronizes the state for a single node in the cluster.
type NodeSync struct {
c controller
cloudAlias cloudAlias
kubeAPI kubeAPI
mode NodeSyncMode
nodeName string
opChan chan syncOp
set *cidrset.CidrSet
}
// New returns a new syncer for a given node.
func New(c controller, cloudAlias cloudAlias, kubeAPI kubeAPI, mode NodeSyncMode, nodeName string, set *cidrset.CidrSet) *NodeSync {
return &NodeSync{
c: c,
cloudAlias: cloudAlias,
kubeAPI: kubeAPI,
mode: mode,
nodeName: nodeName,
opChan: make(chan syncOp, 1),
set: set,
}
}
// Loop runs the sync loop for a given node. done is an optional channel that
// is closed when the Loop() returns.
func (sync *NodeSync) Loop(done chan struct{}) {
glog.V(2).Infof("Starting sync loop for node %q", sync.nodeName)
defer func() {
if done != nil {
close(done)
}
}()
timeout := sync.c.ResyncTimeout()
delayTimer := time.NewTimer(timeout)
glog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
for {
select {
case op, more := <-sync.opChan:
if !more {
glog.V(2).Infof("Stopping sync loop")
return
}
sync.c.ReportResult(op.run(sync))
if !delayTimer.Stop() {
<-delayTimer.C
}
case <-delayTimer.C:
glog.V(4).Infof("Running resync for node %q", sync.nodeName)
sync.c.ReportResult((&updateOp{}).run(sync))
}
timeout := sync.c.ResyncTimeout()
delayTimer.Reset(timeout)
glog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
}
}
// Update causes an update operation on the given node. If node is nil, then
// the syncer will fetch the node spec from the API server before syncing.
//
// This method is safe to call from multiple goroutines.
func (sync *NodeSync) Update(node *v1.Node) {
sync.opChan <- &updateOp{node}
}
// Delete performs the sync operations necessary to remove the node from the
// IPAM state.
//
// This method is safe to call from multiple goroutines.
func (sync *NodeSync) Delete(node *v1.Node) {
sync.opChan <- &deleteOp{node}
close(sync.opChan)
}
// syncOp is the interface for generic sync operation.
type syncOp interface {
// run the requested sync operation.
run(sync *NodeSync) error
}
// updateOp handles creation and updates of a node.
type updateOp struct {
node *v1.Node
}
func (op *updateOp) String() string {
if op.node == nil {
return fmt.Sprintf("updateOp(nil)")
}
return fmt.Sprintf("updateOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
}
func (op *updateOp) run(sync *NodeSync) error {
glog.V(3).Infof("Running updateOp %+v", op)
ctx := context.Background()
if op.node == nil {
glog.V(3).Infof("Getting node spec for %q", sync.nodeName)
node, err := sync.kubeAPI.Node(ctx, sync.nodeName)
if err != nil {
glog.Errorf("Error getting node %q spec: %v", sync.nodeName, err)
return err
}
op.node = node
}
aliasRange, err := sync.cloudAlias.Alias(ctx, sync.nodeName)
if err != nil {
glog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err)
return err
}
switch {
case op.node.Spec.PodCIDR == "" && aliasRange == nil:
err = op.allocateRange(ctx, sync, op.node)
case op.node.Spec.PodCIDR == "" && aliasRange != nil:
err = op.updateNodeFromAlias(ctx, sync, op.node, aliasRange)
case op.node.Spec.PodCIDR != "" && aliasRange == nil:
err = op.updateAliasFromNode(ctx, sync, op.node)
case op.node.Spec.PodCIDR != "" && aliasRange != nil:
err = op.validateRange(ctx, sync, op.node, aliasRange)
}
return err
}
// validateRange checks that the allocated range and the alias range
// match.
func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
if node.Spec.PodCIDR != aliasRange.String() {
glog.Errorf("Inconsistency detected between node PodCIDR and node alias (%v != %v)",
node.Spec.PodCIDR, aliasRange)
sync.kubeAPI.EmitNodeWarningEvent(node.Name, MismatchEvent,
"Node.Spec.PodCIDR != cloud alias (%v != %v)", node.Spec.PodCIDR, aliasRange)
// User intervention is required in this case, as this is most likely due
// to the user mucking around with their VM aliases on the side.
} else {
glog.V(4).Infof("Node %q CIDR range %v is matches cloud assignment", node.Name, node.Spec.PodCIDR)
}
return nil
}
// updateNodeFromAlias updates the the node from the cloud allocated
// alias.
func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
if sync.mode != SyncFromCloud {
sync.kubeAPI.EmitNodeWarningEvent(node.Name, InvalidModeEvent,
"Cannot sync from cloud in mode %q", sync.mode)
return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode)
}
glog.V(2).Infof("Updating node spec with alias range, node.PodCIDR = %v", aliasRange)
if err := sync.set.Occupy(aliasRange); err != nil {
glog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
return err
}
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, aliasRange); err != nil {
glog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, aliasRange, err)
return err
}
glog.V(2).Infof("Node %q PodCIDR set to %v", node.Name, aliasRange)
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
glog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
return err
}
glog.V(2).Infof("Updated node %q PodCIDR from cloud alias %v", node.Name, aliasRange)
return nil
}
// updateAliasFromNode updates the cloud alias given the node allocation.
func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, node *v1.Node) error {
if sync.mode != SyncFromCluster {
sync.kubeAPI.EmitNodeWarningEvent(
node.Name, InvalidModeEvent, "Cannot sync to cloud in mode %q", sync.mode)
return fmt.Errorf("cannot sync to cloud in mode %q", sync.mode)
}
_, aliasRange, err := net.ParseCIDR(node.Spec.PodCIDR)
if err != nil {
glog.Errorf("Could not parse PodCIDR (%q) for node %q: %v",
node.Spec.PodCIDR, node.Name, err)
return err
}
if err := sync.set.Occupy(aliasRange); err != nil {
glog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
return err
}
if err := sync.cloudAlias.AddAlias(ctx, node.Name, aliasRange); err != nil {
glog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err)
return err
}
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
glog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
return err
}
glog.V(2).Infof("Updated node %q cloud alias with node spec, node.PodCIDR = %v",
node.Name, node.Spec.PodCIDR)
return nil
}
// allocateRange allocates a new range and updates both the cloud
// platform and the node allocation.
func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1.Node) error {
if sync.mode != SyncFromCluster {
sync.kubeAPI.EmitNodeWarningEvent(node.Name, InvalidModeEvent,
"Cannot allocate CIDRs in mode %q", sync.mode)
return fmt.Errorf("controller cannot allocate CIDRS in mode %q", sync.mode)
}
cidrRange, err := sync.set.AllocateNext()
if err != nil {
return err
}
// If addAlias returns a hard error, cidrRange will be leaked as there
// is no durable record of the range. The missing space will be
// recovered on the next restart of the controller.
if err := sync.cloudAlias.AddAlias(ctx, node.Name, cidrRange); err != nil {
glog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err)
return err
}
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, cidrRange); err != nil {
glog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, cidrRange, err)
return err
}
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
glog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
return err
}
glog.V(2).Infof("Allocated PodCIDR %v for node %q", cidrRange, node.Name)
return nil
}
// deleteOp handles deletion of a node.
type deleteOp struct {
node *v1.Node
}
func (op *deleteOp) String() string {
if op.node == nil {
return fmt.Sprintf("deleteOp(nil)")
}
return fmt.Sprintf("deleteOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
}
func (op *deleteOp) run(sync *NodeSync) error {
glog.V(3).Infof("Running deleteOp %+v", op)
if op.node.Spec.PodCIDR == "" {
glog.V(2).Infof("Node %q was deleted, node had no PodCIDR range assigned", op.node.Name)
return nil
}
_, cidrRange, err := net.ParseCIDR(op.node.Spec.PodCIDR)
if err != nil {
glog.Errorf("Deleted node %q has an invalid podCIDR %q: %v",
op.node.Name, op.node.Spec.PodCIDR, err)
sync.kubeAPI.EmitNodeWarningEvent(op.node.Name, InvalidPodCIDR,
"Node %q has an invalid PodCIDR: %q", op.node.Name, op.node.Spec.PodCIDR)
return nil
}
sync.set.Release(cidrRange)
glog.V(2).Infof("Node %q was deleted, releasing CIDR range %v",
op.node.Name, op.node.Spec.PodCIDR)
return nil
}

View File

@ -0,0 +1,297 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sync
import (
"context"
"fmt"
"net"
"reflect"
"testing"
"time"
"github.com/golang/glog"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/controller/node/ipam/cidrset"
"k8s.io/kubernetes/pkg/controller/node/ipam/test"
"k8s.io/api/core/v1"
)
var (
_, clusterCIDRRange, _ = net.ParseCIDR("10.1.0.0/16")
)
type fakeEvent struct {
nodeName string
reason string
}
type fakeAPIs struct {
aliasRange *net.IPNet
aliasErr error
addAliasErr error
nodeRet *v1.Node
nodeErr error
updateNodeErr error
resyncTimeout time.Duration
reportChan chan struct{}
updateNodeNetworkUnavailableErr error
calls []string
events []fakeEvent
results []error
}
func (f *fakeAPIs) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) {
f.calls = append(f.calls, fmt.Sprintf("alias %v", nodeName))
return f.aliasRange, f.aliasErr
}
func (f *fakeAPIs) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error {
f.calls = append(f.calls, fmt.Sprintf("addAlias %v %v", nodeName, cidrRange))
return f.addAliasErr
}
func (f *fakeAPIs) Node(ctx context.Context, name string) (*v1.Node, error) {
f.calls = append(f.calls, fmt.Sprintf("node %v", name))
return f.nodeRet, f.nodeErr
}
func (f *fakeAPIs) UpdateNodePodCIDR(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error {
f.calls = append(f.calls, fmt.Sprintf("updateNode %v", node))
return f.updateNodeErr
}
func (f *fakeAPIs) UpdateNodeNetworkUnavailable(nodeName string, unavailable bool) error {
f.calls = append(f.calls, fmt.Sprintf("updateNodeNetworkUnavailable %v %v", nodeName, unavailable))
return f.updateNodeNetworkUnavailableErr
}
func (f *fakeAPIs) EmitNodeWarningEvent(nodeName, reason, fmtStr string, args ...interface{}) {
f.events = append(f.events, fakeEvent{nodeName, reason})
}
func (f *fakeAPIs) ReportResult(err error) {
glog.V(2).Infof("ReportResult %v", err)
f.results = append(f.results, err)
if f.reportChan != nil {
f.reportChan <- struct{}{}
}
}
func (f *fakeAPIs) ResyncTimeout() time.Duration {
if f.resyncTimeout == 0 {
return time.Second * 10000
}
return f.resyncTimeout
}
func (f *fakeAPIs) dumpTrace() {
for i, x := range f.calls {
glog.Infof("trace %v: %v", i, x)
}
}
var nodeWithoutCIDRRange = &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node1"},
}
var nodeWithCIDRRange = &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node1"},
Spec: v1.NodeSpec{PodCIDR: "10.1.1.0/24"},
}
func TestNodeSyncUpdate(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
desc string
mode NodeSyncMode
node *v1.Node
fake fakeAPIs
events []fakeEvent
wantError bool
}{
{
desc: "validate range ==",
mode: SyncFromCloud,
node: nodeWithCIDRRange,
fake: fakeAPIs{
aliasRange: test.MustParseCIDR(nodeWithCIDRRange.Spec.PodCIDR),
},
},
{
desc: "validate range !=",
mode: SyncFromCloud,
node: nodeWithCIDRRange,
fake: fakeAPIs{aliasRange: test.MustParseCIDR("192.168.0.0/24")},
events: []fakeEvent{{"node1", "CloudCIDRAllocatorMismatch"}},
},
{
desc: "update alias from node",
mode: SyncFromCloud,
node: nodeWithCIDRRange,
events: []fakeEvent{{"node1", "CloudCIDRAllocatorInvalidMode"}},
wantError: true,
},
{
desc: "update alias from node",
mode: SyncFromCluster,
node: nodeWithCIDRRange,
// XXX/bowei -- validation
},
{
desc: "update node from alias",
mode: SyncFromCloud,
node: nodeWithoutCIDRRange,
fake: fakeAPIs{aliasRange: test.MustParseCIDR("10.1.2.3/16")},
// XXX/bowei -- validation
},
{
desc: "update node from alias",
mode: SyncFromCluster,
node: nodeWithoutCIDRRange,
fake: fakeAPIs{aliasRange: test.MustParseCIDR("10.1.2.3/16")},
events: []fakeEvent{{"node1", "CloudCIDRAllocatorInvalidMode"}},
wantError: true,
},
{
desc: "allocate range",
mode: SyncFromCloud,
node: nodeWithoutCIDRRange,
events: []fakeEvent{{"node1", "CloudCIDRAllocatorInvalidMode"}},
wantError: true,
},
{
desc: "allocate range",
mode: SyncFromCluster,
node: nodeWithoutCIDRRange,
},
{
desc: "update with node==nil",
mode: SyncFromCluster,
node: nil,
fake: fakeAPIs{
nodeRet: nodeWithCIDRRange,
},
wantError: false,
},
} {
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
sync := New(&tc.fake, &tc.fake, &tc.fake, tc.mode, "node1", cidr)
doneChan := make(chan struct{})
// Do a single step of the loop.
go sync.Loop(doneChan)
sync.Update(tc.node)
close(sync.opChan)
<-doneChan
tc.fake.dumpTrace()
if !reflect.DeepEqual(tc.fake.events, tc.events) {
t.Errorf("%v, %v; fake.events = %#v, want %#v", tc.desc, tc.mode, tc.fake.events, tc.events)
}
var hasError bool
for _, r := range tc.fake.results {
hasError = hasError || (r != nil)
}
if hasError != tc.wantError {
t.Errorf("%v, %v; hasError = %t, errors = %v, want %t",
tc.desc, tc.mode, hasError, tc.fake.events, tc.wantError)
}
}
}
func TestNodeSyncResync(t *testing.T) {
fake := &fakeAPIs{
nodeRet: nodeWithCIDRRange,
resyncTimeout: time.Millisecond,
reportChan: make(chan struct{}),
}
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
sync := New(fake, fake, fake, SyncFromCluster, "node1", cidr)
doneChan := make(chan struct{})
go sync.Loop(doneChan)
<-fake.reportChan
close(sync.opChan)
// Unblock loop().
go func() {
<-fake.reportChan
}()
<-doneChan
fake.dumpTrace()
}
func TestNodeSyncDelete(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
desc string
mode NodeSyncMode
node *v1.Node
fake fakeAPIs
}{
{
desc: "delete",
mode: SyncFromCluster,
node: nodeWithCIDRRange,
},
{
desc: "delete without CIDR range",
mode: SyncFromCluster,
node: nodeWithoutCIDRRange,
},
{
desc: "delete with invalid CIDR range",
mode: SyncFromCluster,
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node1"},
Spec: v1.NodeSpec{PodCIDR: "invalid"},
},
},
} {
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
sync := New(&tc.fake, &tc.fake, &tc.fake, tc.mode, "node1", cidr)
doneChan := make(chan struct{})
// Do a single step of the loop.
go sync.Loop(doneChan)
sync.Delete(tc.node)
<-doneChan
tc.fake.dumpTrace()
/*
if !reflect.DeepEqual(tc.fake.events, tc.events) {
t.Errorf("%v, %v; fake.events = %#v, want %#v", tc.desc, tc.mode, tc.fake.events, tc.events)
}
var hasError bool
for _, r := range tc.fake.results {
hasError = hasError || (r != nil)
}
if hasError != tc.wantError {
t.Errorf("%v, %v; hasError = %t, errors = %v, want %t",
tc.desc, tc.mode, hasError, tc.fake.events, tc.wantError)
}
*/
}
}

View File

@ -0,0 +1,22 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["utils.go"],
importpath = "k8s.io/kubernetes/pkg/controller/node/ipam/test",
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,31 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package test
import (
"net"
)
// MustParseCIDR returns the CIDR range parsed from s or panics if the string
// cannot be parsed.
func MustParseCIDR(s string) *net.IPNet {
_, ret, err := net.ParseCIDR(s)
if err != nil {
panic(err)
}
return ret
}

View File

@ -0,0 +1,67 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"time"
)
// Timeout manages the resync loop timing for a given node sync operation. The
// timeout changes depending on whether or not there was an error reported for
// the operation. Consecutive errors will result in exponential backoff to a
// maxBackoff timeout.
type Timeout struct {
// Resync is the default timeout duration when there are no errors.
Resync time.Duration
// MaxBackoff is the maximum timeout when in a error backoff state.
MaxBackoff time.Duration
// InitialRetry is the initial retry interval when an error is reported.
InitialRetry time.Duration
// errs is the count of consecutive errors that have occurred.
errs int
// current is the current backoff timeout.
current time.Duration
}
// Update the timeout with the current error state.
func (b *Timeout) Update(ok bool) {
if ok {
b.errs = 0
b.current = b.Resync
return
}
b.errs++
if b.errs == 1 {
b.current = b.InitialRetry
return
}
b.current *= 2
if b.current >= b.MaxBackoff {
b.current = b.MaxBackoff
}
}
// Next returns the next operation timeout given the disposition of err.
func (b *Timeout) Next() time.Duration {
if b.errs == 0 {
return b.Resync
}
return b.current
}

View File

@ -0,0 +1,57 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"errors"
"testing"
"time"
)
func TestTimeout(t *testing.T) {
time10s := 10 * time.Second
time5s := 5 * time.Second
timeout := &Timeout{
Resync: time10s,
MaxBackoff: time5s,
InitialRetry: time.Second,
}
for _, testStep := range []struct {
err error
want time.Duration
}{
{nil, time10s},
{nil, time10s},
{errors.New("x"), time.Second},
{errors.New("x"), 2 * time.Second},
{errors.New("x"), 4 * time.Second},
{errors.New("x"), 5 * time.Second},
{errors.New("x"), 5 * time.Second},
{nil, time10s},
{nil, time10s},
{errors.New("x"), time.Second},
{errors.New("x"), 2 * time.Second},
{nil, time10s},
} {
timeout.Update(testStep.err == nil)
next := timeout.Next()
if next != testStep.want {
t.Errorf("timeout.next(%v) = %v, want %v", testStep.err, next, testStep.want)
}
}
}

View File

@ -0,0 +1,78 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"sync"
"github.com/prometheus/client_golang/prometheus"
)
const (
nodeControllerSubsystem = "node_collector"
zoneHealthStatisticKey = "zone_health"
zoneSizeKey = "zone_size"
zoneNoUnhealthyNodesKey = "unhealthy_nodes_in_zone"
evictionsNumberKey = "evictions_number"
)
var (
zoneHealth = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: nodeControllerSubsystem,
Name: zoneHealthStatisticKey,
Help: "Gauge measuring percentage of healthy nodes per zone.",
},
[]string{"zone"},
)
zoneSize = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: nodeControllerSubsystem,
Name: zoneSizeKey,
Help: "Gauge measuring number of registered Nodes per zones.",
},
[]string{"zone"},
)
unhealthyNodes = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: nodeControllerSubsystem,
Name: zoneNoUnhealthyNodesKey,
Help: "Gauge measuring number of not Ready Nodes per zones.",
},
[]string{"zone"},
)
evictionsNumber = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: nodeControllerSubsystem,
Name: evictionsNumberKey,
Help: "Number of Node evictions that happened since current instance of NodeController started.",
},
[]string{"zone"},
)
)
var registerMetrics sync.Once
// Register the metrics that are to be monitored.
func Register() {
registerMetrics.Do(func() {
prometheus.MustRegister(zoneHealth)
prometheus.MustRegister(zoneSize)
prometheus.MustRegister(unhealthyNodes)
prometheus.MustRegister(evictionsNumber)
})
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = [
"rate_limited_queue_test.go",
"taint_controller_test.go",
"timed_workers_test.go",
],
importpath = "k8s.io/kubernetes/pkg/controller/node/scheduler",
library = ":go_default_library",
deps = [
"//pkg/controller/testutil:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/fake:go_default_library",
"//vendor/k8s.io/client-go/testing:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"rate_limited_queue.go",
"taint_controller.go",
"timed_workers.go",
],
importpath = "k8s.io/kubernetes/pkg/controller/node/scheduler",
deps = [
"//pkg/apis/core/helper:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,314 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"container/heap"
"sync"
"time"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/flowcontrol"
"github.com/golang/glog"
)
const (
// NodeStatusUpdateRetry controls the number of retries of writing
// NodeStatus update.
NodeStatusUpdateRetry = 5
// NodeEvictionPeriod controls how often NodeController will try to
// evict Pods from non-responsive Nodes.
NodeEvictionPeriod = 100 * time.Millisecond
// EvictionRateLimiterBurst is the burst value for all eviction rate
// limiters
EvictionRateLimiterBurst = 1
)
// TimedValue is a value that should be processed at a designated time.
type TimedValue struct {
Value string
// UID could be anything that helps identify the value
UID interface{}
AddedAt time.Time
ProcessAt time.Time
}
// now is used to test time
var now = time.Now
// TimedQueue is a priority heap where the lowest ProcessAt is at the front of the queue
type TimedQueue []*TimedValue
// Len is the length of the queue.
func (h TimedQueue) Len() int { return len(h) }
// Less returns true if queue[i] < queue[j].
func (h TimedQueue) Less(i, j int) bool { return h[i].ProcessAt.Before(h[j].ProcessAt) }
// Swap swaps index i and j.
func (h TimedQueue) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
// Push a new TimedValue on to the queue.
func (h *TimedQueue) Push(x interface{}) {
*h = append(*h, x.(*TimedValue))
}
// Pop the lowest ProcessAt item.
func (h *TimedQueue) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
// UniqueQueue is a FIFO queue which additionally guarantees that any
// element can be added only once until it is removed.
type UniqueQueue struct {
lock sync.Mutex
queue TimedQueue
set sets.String
}
// Add a new value to the queue if it wasn't added before, or was
// explicitly removed by the Remove call. Returns true if new value
// was added.
func (q *UniqueQueue) Add(value TimedValue) bool {
q.lock.Lock()
defer q.lock.Unlock()
if q.set.Has(value.Value) {
return false
}
heap.Push(&q.queue, &value)
q.set.Insert(value.Value)
return true
}
// Replace replaces an existing value in the queue if it already
// exists, otherwise it does nothing. Returns true if the item was
// found.
func (q *UniqueQueue) Replace(value TimedValue) bool {
q.lock.Lock()
defer q.lock.Unlock()
for i := range q.queue {
if q.queue[i].Value != value.Value {
continue
}
heap.Remove(&q.queue, i)
heap.Push(&q.queue, &value)
return true
}
return false
}
// RemoveFromQueue the value from the queue, but keeps it in the set,
// so it won't be added second time. Returns true if something was
// removed.
func (q *UniqueQueue) RemoveFromQueue(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
if !q.set.Has(value) {
return false
}
for i, val := range q.queue {
if val.Value == value {
heap.Remove(&q.queue, i)
return true
}
}
return false
}
// Remove the value from the queue, so Get() call won't return it, and
// allow subsequent addition of the given value. If the value is not
// present does nothing and returns false.
func (q *UniqueQueue) Remove(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
if !q.set.Has(value) {
return false
}
q.set.Delete(value)
for i, val := range q.queue {
if val.Value == value {
heap.Remove(&q.queue, i)
return true
}
}
return true
}
// Get returns the oldest added value that wasn't returned yet.
func (q *UniqueQueue) Get() (TimedValue, bool) {
q.lock.Lock()
defer q.lock.Unlock()
if len(q.queue) == 0 {
return TimedValue{}, false
}
result := heap.Pop(&q.queue).(*TimedValue)
q.set.Delete(result.Value)
return *result, true
}
// Head returns the oldest added value that wasn't returned yet
// without removing it.
func (q *UniqueQueue) Head() (TimedValue, bool) {
q.lock.Lock()
defer q.lock.Unlock()
if len(q.queue) == 0 {
return TimedValue{}, false
}
result := q.queue[0]
return *result, true
}
// Clear removes all items from the queue and duplication preventing
// set.
func (q *UniqueQueue) Clear() {
q.lock.Lock()
defer q.lock.Unlock()
if q.queue.Len() > 0 {
q.queue = make(TimedQueue, 0)
}
if len(q.set) > 0 {
q.set = sets.NewString()
}
}
// RateLimitedTimedQueue is a unique item priority queue ordered by
// the expected next time of execution. It is also rate limited.
type RateLimitedTimedQueue struct {
queue UniqueQueue
limiterLock sync.Mutex
limiter flowcontrol.RateLimiter
}
// NewRateLimitedTimedQueue creates new queue which will use given
// RateLimiter to oversee execution.
func NewRateLimitedTimedQueue(limiter flowcontrol.RateLimiter) *RateLimitedTimedQueue {
return &RateLimitedTimedQueue{
queue: UniqueQueue{
queue: TimedQueue{},
set: sets.NewString(),
},
limiter: limiter,
}
}
// ActionFunc takes a timed value and returns false if the item must
// be retried, with an optional time.Duration if some minimum wait
// interval should be used.
type ActionFunc func(TimedValue) (bool, time.Duration)
// Try processes the queue.Ends prematurely if RateLimiter forbids an
// action and leak is true. Otherwise, requeues the item to be
// processed. Each value is processed once if fn returns true,
// otherwise it is added back to the queue. The returned remaining is
// used to identify the minimum time to execute the next item in the
// queue. The same value is processed only once unless Remove is
// explicitly called on it (it's done by the cancelPodEviction
// function in NodeController when Node becomes Ready again) TODO:
// figure out a good way to do garbage collection for all Nodes that
// were removed from the cluster.
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
val, ok := q.queue.Head()
q.limiterLock.Lock()
defer q.limiterLock.Unlock()
for ok {
// rate limit the queue checking
if !q.limiter.TryAccept() {
glog.V(10).Infof("Try rate limited for value: %v", val)
// Try again later
break
}
now := now()
if now.Before(val.ProcessAt) {
break
}
if ok, wait := fn(val); !ok {
val.ProcessAt = now.Add(wait + 1)
q.queue.Replace(val)
} else {
q.queue.RemoveFromQueue(val.Value)
}
val, ok = q.queue.Head()
}
}
// Add value to the queue to be processed. Won't add the same
// value(comparison by value) a second time if it was already added
// and not removed.
func (q *RateLimitedTimedQueue) Add(value string, uid interface{}) bool {
now := now()
return q.queue.Add(TimedValue{
Value: value,
UID: uid,
AddedAt: now,
ProcessAt: now,
})
}
// Remove Node from the Evictor. The Node won't be processed until
// added again.
func (q *RateLimitedTimedQueue) Remove(value string) bool {
return q.queue.Remove(value)
}
// Clear removes all items from the queue
func (q *RateLimitedTimedQueue) Clear() {
q.queue.Clear()
}
// SwapLimiter safely swaps current limiter for this queue with the
// passed one if capacities or qps's differ.
func (q *RateLimitedTimedQueue) SwapLimiter(newQPS float32) {
q.limiterLock.Lock()
defer q.limiterLock.Unlock()
if q.limiter.QPS() == newQPS {
return
}
var newLimiter flowcontrol.RateLimiter
if newQPS <= 0 {
newLimiter = flowcontrol.NewFakeNeverRateLimiter()
} else {
newLimiter = flowcontrol.NewTokenBucketRateLimiter(newQPS, EvictionRateLimiterBurst)
}
// If we're currently waiting on limiter, we drain the new one - this is a good approach when Burst value is 1
// TODO: figure out if we need to support higher Burst values and decide on the drain logic, should we keep:
// - saturation (percentage of used tokens)
// - number of used tokens
// - number of available tokens
// - something else
for q.limiter.Saturation() > newLimiter.Saturation() {
// Check if we're not using fake limiter
previousSaturation := newLimiter.Saturation()
newLimiter.TryAccept()
// It's a fake limiter
if newLimiter.Saturation() == previousSaturation {
break
}
}
q.limiter.Stop()
q.limiter = newLimiter
}

View File

@ -0,0 +1,334 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"reflect"
"testing"
"time"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/flowcontrol"
)
func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
for i := 0; i < len(lhs); i++ {
if rhs[i].Value != lhs[i] {
return false
}
}
return true
}
func CheckSetEq(lhs, rhs sets.String) bool {
return lhs.HasAll(rhs.List()...) && rhs.HasAll(lhs.List()...)
}
func TestAddNode(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
queuePattern := []string{"first", "second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern := sets.NewString("first", "second", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
}
func TestDelNode(t *testing.T) {
defer func() { now = time.Now }()
var tick int64
now = func() time.Time {
t := time.Unix(tick, 0)
tick++
return t
}
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("first")
queuePattern := []string{"second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern := sets.NewString("second", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("second")
queuePattern = []string{"first", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern = sets.NewString("first", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("third")
queuePattern = []string{"first", "second"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern = sets.NewString("first", "second")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
}
func TestTry(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("second")
deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value)
return true, 0
})
setPattern := sets.NewString("first", "third")
if len(deletedMap) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, deletedMap) {
t.Errorf("Invalid map. Got %v, expected %v", deletedMap, setPattern)
}
}
func TestTryOrdering(t *testing.T) {
defer func() { now = time.Now }()
current := time.Unix(0, 0)
delay := 0
// the current time is incremented by 1ms every time now is invoked
now = func() time.Time {
if delay > 0 {
delay--
} else {
current = current.Add(time.Millisecond)
}
t.Logf("time %d", current.UnixNano())
return current
}
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
order := []string{}
count := 0
hasQueued := false
evictor.Try(func(value TimedValue) (bool, time.Duration) {
count++
t.Logf("eviction %d", count)
if value.ProcessAt.IsZero() {
t.Fatalf("processAt should not be zero")
}
switch value.Value {
case "first":
if !value.AddedAt.Equal(time.Unix(0, time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %v", value.Value, value.AddedAt)
}
case "second":
if !value.AddedAt.Equal(time.Unix(0, 2*time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %v", value.Value, value.AddedAt)
}
if hasQueued {
if !value.ProcessAt.Equal(time.Unix(0, 6*time.Millisecond.Nanoseconds())) {
t.Fatalf("process time for %s is %v", value.Value, value.ProcessAt)
}
break
}
hasQueued = true
delay = 1
t.Logf("going to delay")
return false, 2 * time.Millisecond
case "third":
if !value.AddedAt.Equal(time.Unix(0, 3*time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %v", value.Value, value.AddedAt)
}
}
order = append(order, value.Value)
return true, 0
})
if !reflect.DeepEqual(order, []string{"first", "third"}) {
t.Fatalf("order was wrong: %v", order)
}
if count != 3 {
t.Fatalf("unexpected iterations: %d", count)
}
}
func TestTryRemovingWhileTry(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
processing := make(chan struct{})
wait := make(chan struct{})
order := []string{}
count := 0
queued := false
// while the Try function is processing "second", remove it from the queue
// we should not see "second" retried.
go func() {
<-processing
evictor.Remove("second")
close(wait)
}()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
count++
if value.AddedAt.IsZero() {
t.Fatalf("added should not be zero")
}
if value.ProcessAt.IsZero() {
t.Fatalf("next should not be zero")
}
if !queued && value.Value == "second" {
queued = true
close(processing)
<-wait
return false, time.Millisecond
}
order = append(order, value.Value)
return true, 0
})
if !reflect.DeepEqual(order, []string{"first", "third"}) {
t.Fatalf("order was wrong: %v", order)
}
if count != 3 {
t.Fatalf("unexpected iterations: %d", count)
}
}
func TestClear(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Clear()
if len(evictor.queue.queue) != 0 {
t.Fatalf("Clear should remove all elements from the queue.")
}
}
func TestSwapLimiter(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
fakeAlways := flowcontrol.NewFakeAlwaysRateLimiter()
qps := evictor.limiter.QPS()
if qps != fakeAlways.QPS() {
t.Fatalf("QPS does not match create one: %v instead of %v", qps, fakeAlways.QPS())
}
evictor.SwapLimiter(0)
qps = evictor.limiter.QPS()
fakeNever := flowcontrol.NewFakeNeverRateLimiter()
if qps != fakeNever.QPS() {
t.Fatalf("QPS does not match create one: %v instead of %v", qps, fakeNever.QPS())
}
createdQPS := float32(5.5)
evictor.SwapLimiter(createdQPS)
qps = evictor.limiter.QPS()
if qps != createdQPS {
t.Fatalf("QPS does not match create one: %v instead of %v", qps, createdQPS)
}
}
func TestAddAfterTry(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("second")
deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value)
return true, 0
})
setPattern := sets.NewString("first", "third")
if len(deletedMap) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, deletedMap) {
t.Errorf("Invalid map. Got %v, expected %v", deletedMap, setPattern)
}
evictor.Add("first", "11111")
evictor.Try(func(value TimedValue) (bool, time.Duration) {
t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.")
return true, 0
})
}

View File

@ -0,0 +1,436 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"fmt"
"sync"
"time"
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/apis/core/helper"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"github.com/golang/glog"
)
const (
nodeUpdateChannelSize = 10
podUpdateChannelSize = 1
retries = 5
)
// Needed to make workqueue work
type updateItemInterface interface{}
type nodeUpdateItem struct {
oldNode *v1.Node
newNode *v1.Node
newTaints []v1.Taint
}
type podUpdateItem struct {
oldPod *v1.Pod
newPod *v1.Pod
newTolerations []v1.Toleration
}
// NoExecuteTaintManager listens to Taint/Toleration changes and is responsible for removing Pods
// from Nodes tainted with NoExecute Taints.
type NoExecuteTaintManager struct {
client clientset.Interface
recorder record.EventRecorder
taintEvictionQueue *TimedWorkerQueue
// keeps a map from nodeName to all noExecute taints on that Node
taintedNodesLock sync.Mutex
taintedNodes map[string][]v1.Taint
nodeUpdateChannel chan *nodeUpdateItem
podUpdateChannel chan *podUpdateItem
nodeUpdateQueue workqueue.Interface
podUpdateQueue workqueue.Interface
}
func deletePodHandler(c clientset.Interface, emitEventFunc func(types.NamespacedName)) func(args *WorkArgs) error {
return func(args *WorkArgs) error {
ns := args.NamespacedName.Namespace
name := args.NamespacedName.Name
glog.V(0).Infof("NoExecuteTaintManager is deleting Pod: %v", args.NamespacedName.String())
if emitEventFunc != nil {
emitEventFunc(args.NamespacedName)
}
var err error
for i := 0; i < retries; i++ {
err = c.CoreV1().Pods(ns).Delete(name, &metav1.DeleteOptions{})
if err == nil {
break
}
time.Sleep(10 * time.Millisecond)
}
return err
}
}
func getNoExecuteTaints(taints []v1.Taint) []v1.Taint {
result := []v1.Taint{}
for i := range taints {
if taints[i].Effect == v1.TaintEffectNoExecute {
result = append(result, taints[i])
}
}
return result
}
func getPodsAssignedToNode(c clientset.Interface, nodeName string) ([]v1.Pod, error) {
selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName})
pods, err := c.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
FieldSelector: selector.String(),
LabelSelector: labels.Everything().String(),
})
for i := 0; i < retries && err != nil; i++ {
pods, err = c.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
FieldSelector: selector.String(),
LabelSelector: labels.Everything().String(),
})
time.Sleep(100 * time.Millisecond)
}
if err != nil {
return []v1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName)
}
return pods.Items, nil
}
// getMinTolerationTime returns minimal toleration time from the given slice, or -1 if it's infinite.
func getMinTolerationTime(tolerations []v1.Toleration) time.Duration {
minTolerationTime := int64(-1)
if len(tolerations) == 0 {
return 0
}
for i := range tolerations {
if tolerations[i].TolerationSeconds != nil {
tolerationSeconds := *(tolerations[i].TolerationSeconds)
if tolerationSeconds <= 0 {
return 0
} else if tolerationSeconds < minTolerationTime || minTolerationTime == -1 {
minTolerationTime = tolerationSeconds
}
}
}
return time.Duration(minTolerationTime) * time.Second
}
// NewNoExecuteTaintManager creates a new NoExecuteTaintManager that will use passed clientset to
// communicate with the API server.
func NewNoExecuteTaintManager(c clientset.Interface) *NoExecuteTaintManager {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"})
eventBroadcaster.StartLogging(glog.Infof)
if c != nil {
glog.V(0).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(c.CoreV1().RESTClient()).Events("")})
} else {
glog.Fatalf("kubeClient is nil when starting NodeController")
}
tm := &NoExecuteTaintManager{
client: c,
recorder: recorder,
taintedNodes: make(map[string][]v1.Taint),
nodeUpdateChannel: make(chan *nodeUpdateItem, nodeUpdateChannelSize),
podUpdateChannel: make(chan *podUpdateItem, podUpdateChannelSize),
nodeUpdateQueue: workqueue.New(),
podUpdateQueue: workqueue.New(),
}
tm.taintEvictionQueue = CreateWorkerQueue(deletePodHandler(c, tm.emitPodDeletionEvent))
return tm
}
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) {
glog.V(0).Infof("Starting NoExecuteTaintManager")
// Functions that are responsible for taking work items out of the workqueues and putting them
// into channels.
go func(stopCh <-chan struct{}) {
for {
item, shutdown := tc.nodeUpdateQueue.Get()
if shutdown {
break
}
nodeUpdate := item.(*nodeUpdateItem)
select {
case <-stopCh:
break
case tc.nodeUpdateChannel <- nodeUpdate:
}
}
}(stopCh)
go func(stopCh <-chan struct{}) {
for {
item, shutdown := tc.podUpdateQueue.Get()
if shutdown {
break
}
podUpdate := item.(*podUpdateItem)
select {
case <-stopCh:
break
case tc.podUpdateChannel <- podUpdate:
}
}
}(stopCh)
// When processing events we want to prioritize Node updates over Pod updates,
// as NodeUpdates that interest NoExecuteTaintManager should be handled as soon as possible -
// we don't want user (or system) to wait until PodUpdate queue is drained before it can
// start evicting Pods from tainted Nodes.
for {
select {
case <-stopCh:
break
case nodeUpdate := <-tc.nodeUpdateChannel:
tc.handleNodeUpdate(nodeUpdate)
case podUpdate := <-tc.podUpdateChannel:
// If we found a Pod update we need to empty Node queue first.
priority:
for {
select {
case nodeUpdate := <-tc.nodeUpdateChannel:
tc.handleNodeUpdate(nodeUpdate)
default:
break priority
}
}
// After Node queue is emptied we process podUpdate.
tc.handlePodUpdate(podUpdate)
}
}
}
// PodUpdated is used to notify NoExecuteTaintManager about Pod changes.
func (tc *NoExecuteTaintManager) PodUpdated(oldPod *v1.Pod, newPod *v1.Pod) {
oldTolerations := []v1.Toleration{}
if oldPod != nil {
oldTolerations = oldPod.Spec.Tolerations
}
newTolerations := []v1.Toleration{}
if newPod != nil {
newTolerations = newPod.Spec.Tolerations
}
if oldPod != nil && newPod != nil && helper.Semantic.DeepEqual(oldTolerations, newTolerations) && oldPod.Spec.NodeName == newPod.Spec.NodeName {
return
}
updateItem := &podUpdateItem{
oldPod: oldPod,
newPod: newPod,
newTolerations: newTolerations,
}
tc.podUpdateQueue.Add(updateItemInterface(updateItem))
}
// NodeUpdated is used to notify NoExecuteTaintManager about Node changes.
func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node) {
oldTaints := []v1.Taint{}
if oldNode != nil {
oldTaints = oldNode.Spec.Taints
}
oldTaints = getNoExecuteTaints(oldTaints)
newTaints := []v1.Taint{}
if newNode != nil {
newTaints = newNode.Spec.Taints
}
newTaints = getNoExecuteTaints(newTaints)
if oldNode != nil && newNode != nil && helper.Semantic.DeepEqual(oldTaints, newTaints) {
return
}
updateItem := &nodeUpdateItem{
oldNode: oldNode,
newNode: newNode,
newTaints: newTaints,
}
tc.nodeUpdateQueue.Add(updateItemInterface(updateItem))
}
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName) {
if tc.taintEvictionQueue.CancelWork(nsName.String()) {
tc.emitCancelPodDeletionEvent(nsName)
}
}
func (tc *NoExecuteTaintManager) processPodOnNode(
podNamespacedName types.NamespacedName,
nodeName string,
tolerations []v1.Toleration,
taints []v1.Taint,
now time.Time,
) {
if len(taints) == 0 {
tc.cancelWorkWithEvent(podNamespacedName)
}
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
if !allTolerated {
glog.V(2).Infof("Not all taints are tolerated after update for Pod %v on %v", podNamespacedName.String(), nodeName)
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
tc.cancelWorkWithEvent(podNamespacedName)
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
return
}
minTolerationTime := getMinTolerationTime(usedTolerations)
// getMinTolerationTime returns negative value to denote infinite toleration.
if minTolerationTime < 0 {
glog.V(4).Infof("New tolerations for %v tolerate forever. Scheduled deletion won't be cancelled if already scheduled.", podNamespacedName.String())
return
}
startTime := now
triggerTime := startTime.Add(minTolerationTime)
scheduledEviction := tc.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String())
if scheduledEviction != nil {
startTime = scheduledEviction.CreatedAt
if startTime.Add(minTolerationTime).Before(triggerTime) {
return
}
tc.cancelWorkWithEvent(podNamespacedName)
}
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
}
func (tc *NoExecuteTaintManager) handlePodUpdate(podUpdate *podUpdateItem) {
// Delete
if podUpdate.newPod == nil {
pod := podUpdate.oldPod
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
glog.V(4).Infof("Noticed pod deletion: %#v", podNamespacedName)
tc.cancelWorkWithEvent(podNamespacedName)
return
}
// Create or Update
pod := podUpdate.newPod
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
glog.V(4).Infof("Noticed pod update: %#v", podNamespacedName)
nodeName := pod.Spec.NodeName
if nodeName == "" {
return
}
taints, ok := func() ([]v1.Taint, bool) {
tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock()
taints, ok := tc.taintedNodes[nodeName]
return taints, ok
}()
// It's possible that Node was deleted, or Taints were removed before, which triggered
// eviction cancelling if it was needed.
if !ok {
return
}
tc.processPodOnNode(podNamespacedName, nodeName, podUpdate.newTolerations, taints, time.Now())
}
func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate *nodeUpdateItem) {
// Delete
if nodeUpdate.newNode == nil {
node := nodeUpdate.oldNode
glog.V(4).Infof("Noticed node deletion: %#v", node.Name)
tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock()
delete(tc.taintedNodes, node.Name)
return
}
// Create or Update
glog.V(4).Infof("Noticed node update: %#v", nodeUpdate)
node := nodeUpdate.newNode
taints := nodeUpdate.newTaints
func() {
tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock()
glog.V(4).Infof("Updating known taints on node %v: %v", node.Name, taints)
if len(taints) == 0 {
delete(tc.taintedNodes, node.Name)
} else {
tc.taintedNodes[node.Name] = taints
}
}()
pods, err := getPodsAssignedToNode(tc.client, node.Name)
if err != nil {
glog.Errorf(err.Error())
return
}
if len(pods) == 0 {
return
}
// Short circuit, to make this controller a bit faster.
if len(taints) == 0 {
glog.V(4).Infof("All taints were removed from the Node %v. Cancelling all evictions...", node.Name)
for i := range pods {
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
}
return
}
now := time.Now()
for i := range pods {
pod := &pods[i]
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
tc.processPodOnNode(podNamespacedName, node.Name, pod.Spec.Tolerations, taints, now)
}
}
func (tc *NoExecuteTaintManager) emitPodDeletionEvent(nsName types.NamespacedName) {
if tc.recorder == nil {
return
}
ref := &v1.ObjectReference{
Kind: "Pod",
Name: nsName.Name,
Namespace: nsName.Namespace,
}
tc.recorder.Eventf(ref, v1.EventTypeNormal, "TaintManagerEviction", "Marking for deletion Pod %s", nsName.String())
}
func (tc *NoExecuteTaintManager) emitCancelPodDeletionEvent(nsName types.NamespacedName) {
if tc.recorder == nil {
return
}
ref := &v1.ObjectReference{
Kind: "Pod",
Name: nsName.Name,
Namespace: nsName.Namespace,
}
tc.recorder.Eventf(ref, v1.EventTypeNormal, "TaintManagerEviction", "Cancelling deletion of Pod %s", nsName.String())
}

View File

@ -0,0 +1,604 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"fmt"
"sort"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/kubernetes/pkg/controller/testutil"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clienttesting "k8s.io/client-go/testing"
)
var timeForControllerToProgress = 500 * time.Millisecond
func createNoExecuteTaint(index int) v1.Taint {
now := metav1.Now()
return v1.Taint{
Key: "testTaint" + fmt.Sprintf("%v", index),
Value: "test" + fmt.Sprintf("%v", index),
Effect: v1.TaintEffectNoExecute,
TimeAdded: &now,
}
}
func addToleration(pod *v1.Pod, index int, duration int64) *v1.Pod {
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
if duration < 0 {
pod.Spec.Tolerations = []v1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: v1.TaintEffectNoExecute}}
} else {
pod.Spec.Tolerations = []v1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: v1.TaintEffectNoExecute, TolerationSeconds: &duration}}
}
return pod
}
func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node {
taints := []v1.Taint{}
for _, index := range indices {
taints = append(taints, createNoExecuteTaint(index))
}
node.Spec.Taints = taints
return node
}
type timestampedPod struct {
names []string
timestamp time.Duration
}
type durationSlice []timestampedPod
func (a durationSlice) Len() int { return len(a) }
func (a durationSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a durationSlice) Less(i, j int) bool { return a[i].timestamp < a[j].timestamp }
func TestFilterNoExecuteTaints(t *testing.T) {
taints := []v1.Taint{
{
Key: "one",
Value: "one",
Effect: v1.TaintEffectNoExecute,
},
{
Key: "two",
Value: "two",
Effect: v1.TaintEffectNoSchedule,
},
}
taints = getNoExecuteTaints(taints)
if len(taints) != 1 || taints[0].Key != "one" {
t.Errorf("Filtering doesn't work. Got %v", taints)
}
}
func TestCreatePod(t *testing.T) {
testCases := []struct {
description string
pod *v1.Pod
taintedNodes map[string][]v1.Taint
expectDelete bool
}{
{
description: "not scheduled - ignore",
pod: testutil.NewPod("pod1", ""),
taintedNodes: map[string][]v1.Taint{},
expectDelete: false,
},
{
description: "scheduled on untainted Node",
pod: testutil.NewPod("pod1", "node1"),
taintedNodes: map[string][]v1.Taint{},
expectDelete: false,
},
{
description: "schedule on tainted Node",
pod: testutil.NewPod("pod1", "node1"),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: true,
},
{
description: "schedule on tainted Node with finite toleration",
pod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: false,
},
{
description: "schedule on tainted Node with infinite toleration",
pod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: false,
},
{
description: "schedule on tainted Node with infinite ivalid toleration",
pod: addToleration(testutil.NewPod("pod1", "node1"), 2, -1),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: true,
},
}
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.taintedNodes = item.taintedNodes
controller.PodUpdated(nil, item.pod)
// wait a bit
time.Sleep(timeForControllerToProgress)
podDeleted := false
for _, action := range fakeClientset.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
podDeleted = true
}
}
if podDeleted != item.expectDelete {
t.Errorf("%v: Unexepected test result. Expected delete %v, got %v", item.description, item.expectDelete, podDeleted)
}
close(stopCh)
}
}
func TestDeletePod(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.taintedNodes = map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
}
controller.PodUpdated(testutil.NewPod("pod1", "node1"), nil)
// wait a bit to see if nothing will panic
time.Sleep(timeForControllerToProgress)
close(stopCh)
}
func TestUpdatePod(t *testing.T) {
testCases := []struct {
description string
prevPod *v1.Pod
newPod *v1.Pod
taintedNodes map[string][]v1.Taint
expectDelete bool
additionalSleep time.Duration
}{
{
description: "scheduling onto tainted Node",
prevPod: testutil.NewPod("pod1", ""),
newPod: testutil.NewPod("pod1", "node1"),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: true,
},
{
description: "scheduling onto tainted Node with toleration",
prevPod: addToleration(testutil.NewPod("pod1", ""), 1, -1),
newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: false,
},
{
description: "removing toleration",
prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
newPod: testutil.NewPod("pod1", "node1"),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: true,
},
{
description: "lengthening toleration shouldn't work",
prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 1),
newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
taintedNodes: map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
},
expectDelete: true,
additionalSleep: 1500 * time.Millisecond,
},
}
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.taintedNodes = item.taintedNodes
controller.PodUpdated(nil, item.prevPod)
fakeClientset.ClearActions()
time.Sleep(timeForControllerToProgress)
controller.PodUpdated(item.prevPod, item.newPod)
// wait a bit
time.Sleep(timeForControllerToProgress)
if item.additionalSleep > 0 {
time.Sleep(item.additionalSleep)
}
podDeleted := false
for _, action := range fakeClientset.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
podDeleted = true
}
}
if podDeleted != item.expectDelete {
t.Errorf("%v: Unexepected test result. Expected delete %v, got %v", item.description, item.expectDelete, podDeleted)
}
close(stopCh)
}
}
func TestCreateNode(t *testing.T) {
testCases := []struct {
description string
pods []v1.Pod
node *v1.Node
expectDelete bool
}{
{
description: "Creating Node maching already assigned Pod",
pods: []v1.Pod{
*testutil.NewPod("pod1", "node1"),
},
node: testutil.NewNode("node1"),
expectDelete: false,
},
{
description: "Creating tainted Node maching already assigned Pod",
pods: []v1.Pod{
*testutil.NewPod("pod1", "node1"),
},
node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectDelete: true,
},
{
description: "Creating tainted Node maching already assigned tolerating Pod",
pods: []v1.Pod{
*addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
},
node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectDelete: false,
},
}
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.NodeUpdated(nil, item.node)
// wait a bit
time.Sleep(timeForControllerToProgress)
podDeleted := false
for _, action := range fakeClientset.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
podDeleted = true
}
}
if podDeleted != item.expectDelete {
t.Errorf("%v: Unexepected test result. Expected delete %v, got %v", item.description, item.expectDelete, podDeleted)
}
close(stopCh)
}
}
func TestDeleteNode(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
controller.taintedNodes = map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
}
go controller.Run(stopCh)
controller.NodeUpdated(testutil.NewNode("node1"), nil)
// wait a bit to see if nothing will panic
time.Sleep(timeForControllerToProgress)
controller.taintedNodesLock.Lock()
if _, ok := controller.taintedNodes["node1"]; ok {
t.Error("Node should have been deleted from taintedNodes list")
}
controller.taintedNodesLock.Unlock()
close(stopCh)
}
func TestUpdateNode(t *testing.T) {
testCases := []struct {
description string
pods []v1.Pod
oldNode *v1.Node
newNode *v1.Node
expectDelete bool
additionalSleep time.Duration
}{
{
description: "Added taint",
pods: []v1.Pod{
*testutil.NewPod("pod1", "node1"),
},
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectDelete: true,
},
{
description: "Added tolerated taint",
pods: []v1.Pod{
*addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
},
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectDelete: false,
},
{
description: "Only one added taint tolerated",
pods: []v1.Pod{
*addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
},
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
expectDelete: true,
},
{
description: "Taint removed",
pods: []v1.Pod{
*addToleration(testutil.NewPod("pod1", "node1"), 1, 1),
},
oldNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
newNode: testutil.NewNode("node1"),
expectDelete: false,
additionalSleep: 1500 * time.Millisecond,
},
{
description: "Pod with multiple tolerations are evicted when first one runs out",
pods: []v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
Name: "pod1",
},
Spec: v1.PodSpec{
NodeName: "node1",
Tolerations: []v1.Toleration{
{Key: "testTaint1", Value: "test1", Effect: v1.TaintEffectNoExecute, TolerationSeconds: &[]int64{1}[0]},
{Key: "testTaint2", Value: "test2", Effect: v1.TaintEffectNoExecute, TolerationSeconds: &[]int64{100}[0]},
},
},
Status: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
Status: v1.ConditionTrue,
},
},
},
},
},
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
expectDelete: true,
additionalSleep: 1500 * time.Millisecond,
},
}
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.NodeUpdated(item.oldNode, item.newNode)
// wait a bit
time.Sleep(timeForControllerToProgress)
if item.additionalSleep > 0 {
time.Sleep(item.additionalSleep)
}
podDeleted := false
for _, action := range fakeClientset.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
podDeleted = true
}
}
if podDeleted != item.expectDelete {
t.Errorf("%v: Unexepected test result. Expected delete %v, got %v", item.description, item.expectDelete, podDeleted)
}
close(stopCh)
}
}
func TestUpdateNodeWithMultiplePods(t *testing.T) {
testCases := []struct {
description string
pods []v1.Pod
oldNode *v1.Node
newNode *v1.Node
expectedDeleteTimes durationSlice
}{
{
description: "Pods with different toleration times are evicted appropriately",
pods: []v1.Pod{
*testutil.NewPod("pod1", "node1"),
*addToleration(testutil.NewPod("pod2", "node1"), 1, 1),
*addToleration(testutil.NewPod("pod3", "node1"), 1, -1),
},
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
expectedDeleteTimes: durationSlice{
{[]string{"pod1"}, 0},
{[]string{"pod2"}, time.Second},
},
},
{
description: "Evict all pods not maching all taints instantly",
pods: []v1.Pod{
*testutil.NewPod("pod1", "node1"),
*addToleration(testutil.NewPod("pod2", "node1"), 1, 1),
*addToleration(testutil.NewPod("pod3", "node1"), 1, -1),
},
oldNode: testutil.NewNode("node1"),
newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
expectedDeleteTimes: durationSlice{
{[]string{"pod1", "pod2", "pod3"}, 0},
},
},
}
for _, item := range testCases {
t.Logf("Starting testcase %q", item.description)
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
sort.Sort(item.expectedDeleteTimes)
controller := NewNoExecuteTaintManager(fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.NodeUpdated(item.oldNode, item.newNode)
startedAt := time.Now()
for i := range item.expectedDeleteTimes {
if i == 0 || item.expectedDeleteTimes[i-1].timestamp != item.expectedDeleteTimes[i].timestamp {
// compute a grace duration to give controller time to process updates. Choose big
// enough intervals in the test cases above to avoid flakes.
var increment time.Duration
if i == len(item.expectedDeleteTimes)-1 || item.expectedDeleteTimes[i+1].timestamp == item.expectedDeleteTimes[i].timestamp {
increment = 500 * time.Millisecond
} else {
increment = ((item.expectedDeleteTimes[i+1].timestamp - item.expectedDeleteTimes[i].timestamp) / time.Duration(2))
}
sleepTime := item.expectedDeleteTimes[i].timestamp - time.Since(startedAt) + increment
if sleepTime < 0 {
sleepTime = 0
}
t.Logf("Sleeping for %v", sleepTime)
time.Sleep(sleepTime)
}
for delay, podName := range item.expectedDeleteTimes[i].names {
deleted := false
for _, action := range fakeClientset.Actions() {
deleteAction, ok := action.(clienttesting.DeleteActionImpl)
if !ok {
t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb())
continue
}
if deleteAction.GetResource().Resource != "pods" {
continue
}
if podName == deleteAction.GetName() {
deleted = true
}
}
if !deleted {
t.Errorf("Failed to deleted pod %v after %v", podName, delay)
}
}
for _, action := range fakeClientset.Actions() {
deleteAction, ok := action.(clienttesting.DeleteActionImpl)
if !ok {
t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb())
continue
}
if deleteAction.GetResource().Resource != "pods" {
continue
}
deletedPodName := deleteAction.GetName()
expected := false
for _, podName := range item.expectedDeleteTimes[i].names {
if podName == deletedPodName {
expected = true
}
}
if !expected {
t.Errorf("Pod %v was deleted even though it shouldn't have", deletedPodName)
}
}
fakeClientset.ClearActions()
}
close(stopCh)
}
}
func TestGetMinTolerationTime(t *testing.T) {
one := int64(1)
oneSec := 1 * time.Second
tests := []struct {
tolerations []v1.Toleration
expected time.Duration
}{
{
tolerations: []v1.Toleration{},
expected: 0,
},
{
tolerations: []v1.Toleration{
{
TolerationSeconds: &one,
},
{
TolerationSeconds: nil,
},
},
expected: oneSec,
},
{
tolerations: []v1.Toleration{
{
TolerationSeconds: nil,
},
{
TolerationSeconds: &one,
},
},
expected: oneSec,
},
}
for _, test := range tests {
got := getMinTolerationTime(test.tolerations)
if got != test.expected {
t.Errorf("Incorrect min toleration time: got %v, expected %v", got, test.expected)
}
}
}

View File

@ -0,0 +1,145 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"sync"
"time"
"k8s.io/apimachinery/pkg/types"
"github.com/golang/glog"
)
// WorkArgs keeps arguments that will be passed to the function executed by the worker.
type WorkArgs struct {
NamespacedName types.NamespacedName
}
// KeyFromWorkArgs creates a key for the given `WorkArgs`
func (w *WorkArgs) KeyFromWorkArgs() string {
return w.NamespacedName.String()
}
// NewWorkArgs is a helper function to create new `WorkArgs`
func NewWorkArgs(name, namespace string) *WorkArgs {
return &WorkArgs{types.NamespacedName{Namespace: namespace, Name: name}}
}
// TimedWorker is a responsible for executing a function no earlier than at FireAt time.
type TimedWorker struct {
WorkItem *WorkArgs
CreatedAt time.Time
FireAt time.Time
Timer *time.Timer
}
// CreateWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
func CreateWorker(args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(args *WorkArgs) error) *TimedWorker {
delay := fireAt.Sub(createdAt)
if delay <= 0 {
go f(args)
return nil
}
timer := time.AfterFunc(delay, func() { f(args) })
return &TimedWorker{
WorkItem: args,
CreatedAt: createdAt,
FireAt: fireAt,
Timer: timer,
}
}
// Cancel cancels the execution of function by the `TimedWorker`
func (w *TimedWorker) Cancel() {
if w != nil {
w.Timer.Stop()
}
}
// TimedWorkerQueue keeps a set of TimedWorkers that are still wait for execution.
type TimedWorkerQueue struct {
sync.Mutex
// map of workers keyed by string returned by 'KeyFromWorkArgs' from the given worker.
workers map[string]*TimedWorker
workFunc func(args *WorkArgs) error
}
// CreateWorkerQueue creates a new TimedWorkerQueue for workers that will execute
// given function `f`.
func CreateWorkerQueue(f func(args *WorkArgs) error) *TimedWorkerQueue {
return &TimedWorkerQueue{
workers: make(map[string]*TimedWorker),
workFunc: f,
}
}
func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(args *WorkArgs) error {
return func(args *WorkArgs) error {
err := q.workFunc(args)
q.Lock()
defer q.Unlock()
if err == nil {
// To avoid duplicated calls we keep the key in the queue, to prevent
// subsequent additions.
q.workers[key] = nil
} else {
delete(q.workers, key)
}
return err
}
}
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
func (q *TimedWorkerQueue) AddWork(args *WorkArgs, createdAt time.Time, fireAt time.Time) {
key := args.KeyFromWorkArgs()
glog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt)
q.Lock()
defer q.Unlock()
if _, exists := q.workers[key]; exists {
glog.Warningf("Trying to add already existing work for %+v. Skipping.", args)
return
}
worker := CreateWorker(args, createdAt, fireAt, q.getWrappedWorkerFunc(key))
q.workers[key] = worker
}
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
func (q *TimedWorkerQueue) CancelWork(key string) bool {
q.Lock()
defer q.Unlock()
worker, found := q.workers[key]
result := false
if found {
glog.V(4).Infof("Cancelling TimedWorkerQueue item %v at %v", key, time.Now())
if worker != nil {
result = true
worker.Cancel()
}
delete(q.workers, key)
}
return result
}
// GetWorkerUnsafe returns a TimedWorker corresponding to the given key.
// Unsafe method - workers have attached goroutines which can fire afater this function is called.
func (q *TimedWorkerQueue) GetWorkerUnsafe(key string) *TimedWorker {
q.Lock()
defer q.Unlock()
return q.workers[key]
}

View File

@ -0,0 +1,141 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"sync"
"sync/atomic"
"testing"
"time"
)
func TestExecute(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(5)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
})
now := time.Now()
queue.AddWork(NewWorkArgs("1", "1"), now, now)
queue.AddWork(NewWorkArgs("2", "2"), now, now)
queue.AddWork(NewWorkArgs("3", "3"), now, now)
queue.AddWork(NewWorkArgs("4", "4"), now, now)
queue.AddWork(NewWorkArgs("5", "5"), now, now)
// Adding the same thing second time should be no-op
queue.AddWork(NewWorkArgs("1", "1"), now, now)
queue.AddWork(NewWorkArgs("2", "2"), now, now)
queue.AddWork(NewWorkArgs("3", "3"), now, now)
queue.AddWork(NewWorkArgs("4", "4"), now, now)
queue.AddWork(NewWorkArgs("5", "5"), now, now)
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)
if lastVal != 5 {
t.Errorf("Espected testVal = 5, got %v", lastVal)
}
}
func TestExecuteDelayed(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(5)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
})
now := time.Now()
then := now.Add(3 * time.Second)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)
if lastVal != 5 {
t.Errorf("Espected testVal = 5, got %v", lastVal)
}
}
func TestCancel(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(3)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
})
now := time.Now()
then := now.Add(3 * time.Second)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)
if lastVal != 3 {
t.Errorf("Espected testVal = 3, got %v", lastVal)
}
}
func TestCancelAndReadd(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(4)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
})
now := time.Now()
then := now.Add(3 * time.Second)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
queue.AddWork(NewWorkArgs("2", "2"), now, then)
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)
if lastVal != 4 {
t.Errorf("Espected testVal = 4, got %v", lastVal)
}
}

View File

@ -0,0 +1,44 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["controller_utils.go"],
importpath = "k8s.io/kubernetes/pkg/controller/node/util",
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/cloudprovider:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/util/node:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,295 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"errors"
"fmt"
"strings"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
extensionslisters "k8s.io/client-go/listers/extensions/v1beta1"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/kubelet/util/format"
nodepkg "k8s.io/kubernetes/pkg/util/node"
"github.com/golang/glog"
)
var (
// ErrCloudInstance occurs when the cloud provider does not support
// the Instances API.
ErrCloudInstance = errors.New("cloud provider doesn't support instances")
)
// DeletePods will delete all pods from master running on given node,
// and return true if any pods were deleted, or were found pending
// deletion.
func DeletePods(kubeClient clientset.Interface, recorder record.EventRecorder, nodeName, nodeUID string, daemonStore extensionslisters.DaemonSetLister) (bool, error) {
remaining := false
selector := fields.OneTermEqualSelector(api.PodHostField, nodeName).String()
options := metav1.ListOptions{FieldSelector: selector}
pods, err := kubeClient.CoreV1().Pods(metav1.NamespaceAll).List(options)
var updateErrList []error
if err != nil {
return remaining, err
}
if len(pods.Items) > 0 {
RecordNodeEvent(recorder, nodeName, nodeUID, v1.EventTypeNormal, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName))
}
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeName {
continue
}
// Set reason and message in the pod object.
if _, err = SetPodTerminationReason(kubeClient, &pod, nodeName); err != nil {
if apierrors.IsConflict(err) {
updateErrList = append(updateErrList,
fmt.Errorf("update status failed for pod %q: %v", format.Pod(&pod), err))
continue
}
}
// if the pod has already been marked for deletion, we still return true that there are remaining pods.
if pod.DeletionGracePeriodSeconds != nil {
remaining = true
continue
}
// if the pod is managed by a daemonset, ignore it
_, err := daemonStore.GetPodDaemonSets(&pod)
if err == nil { // No error means at least one daemonset was found
continue
}
glog.V(2).Infof("Starting deletion of pod %v/%v", pod.Namespace, pod.Name)
recorder.Eventf(&pod, v1.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName)
if err := kubeClient.CoreV1().Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
return false, err
}
remaining = true
}
if len(updateErrList) > 0 {
return false, utilerrors.NewAggregate(updateErrList)
}
return remaining, nil
}
// SetPodTerminationReason attempts to set a reason and message in the
// pod status, updates it in the apiserver, and returns an error if it
// encounters one.
func SetPodTerminationReason(kubeClient clientset.Interface, pod *v1.Pod, nodeName string) (*v1.Pod, error) {
if pod.Status.Reason == nodepkg.NodeUnreachablePodReason {
return pod, nil
}
pod.Status.Reason = nodepkg.NodeUnreachablePodReason
pod.Status.Message = fmt.Sprintf(nodepkg.NodeUnreachablePodMessage, nodeName, pod.Name)
var updatedPod *v1.Pod
var err error
if updatedPod, err = kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(pod); err != nil {
return nil, err
}
return updatedPod, nil
}
// ForcefullyDeleteNode deletes the node immediately. The pods on the
// node are cleaned up by the podGC.
func ForcefullyDeleteNode(kubeClient clientset.Interface, nodeName string) error {
if err := kubeClient.CoreV1().Nodes().Delete(nodeName, nil); err != nil {
return fmt.Errorf("unable to delete node %q: %v", nodeName, err)
}
return nil
}
// MarkAllPodsNotReady updates ready status of all pods running on
// given node from master return true if success
func MarkAllPodsNotReady(kubeClient clientset.Interface, node *v1.Node) error {
nodeName := node.Name
glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName)
opts := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName).String()}
pods, err := kubeClient.CoreV1().Pods(metav1.NamespaceAll).List(opts)
if err != nil {
return err
}
errMsg := []string{}
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeName {
continue
}
for i, cond := range pod.Status.Conditions {
if cond.Type == v1.PodReady {
pod.Status.Conditions[i].Status = v1.ConditionFalse
glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name)
_, err := kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(&pod)
if err != nil {
glog.Warningf("Failed to update status for pod %q: %v", format.Pod(&pod), err)
errMsg = append(errMsg, fmt.Sprintf("%v", err))
}
break
}
}
}
if len(errMsg) == 0 {
return nil
}
return fmt.Errorf("%v", strings.Join(errMsg, "; "))
}
// NodeExistsInCloudProvider returns true if the node exists in the
// cloud provider.
func NodeExistsInCloudProvider(cloud cloudprovider.Interface, nodeName types.NodeName) (bool, error) {
instances, ok := cloud.Instances()
if !ok {
return false, fmt.Errorf("%v", ErrCloudInstance)
}
if _, err := instances.ExternalID(nodeName); err != nil {
if err == cloudprovider.InstanceNotFound {
return false, nil
}
return false, err
}
return true, nil
}
// RecordNodeEvent records a event related to a node.
func RecordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype, reason, event string) {
ref := &v1.ObjectReference{
Kind: "Node",
Name: nodeName,
UID: types.UID(nodeUID),
Namespace: "",
}
glog.V(2).Infof("Recording %s event message for node %s", event, nodeName)
recorder.Eventf(ref, eventtype, reason, "Node %s event: %s", nodeName, event)
}
// RecordNodeStatusChange records a event related to a node status change.
func RecordNodeStatusChange(recorder record.EventRecorder, node *v1.Node, newStatus string) {
ref := &v1.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: node.UID,
Namespace: "",
}
glog.V(2).Infof("Recording status change %s event message for node %s", newStatus, node.Name)
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
recorder.Eventf(ref, v1.EventTypeNormal, newStatus, "Node %s status is now: %s", node.Name, newStatus)
}
// SwapNodeControllerTaint returns true in case of success and false
// otherwise.
func SwapNodeControllerTaint(kubeClient clientset.Interface, taintsToAdd, taintsToRemove []*v1.Taint, node *v1.Node) bool {
for _, taintToAdd := range taintsToAdd {
now := metav1.Now()
taintToAdd.TimeAdded = &now
}
err := controller.AddOrUpdateTaintOnNode(kubeClient, node.Name, taintsToAdd...)
if err != nil {
utilruntime.HandleError(
fmt.Errorf(
"unable to taint %+v unresponsive Node %q: %v",
taintsToAdd,
node.Name,
err))
return false
}
glog.V(4).Infof("Added %+v Taint to Node %v", taintsToAdd, node.Name)
err = controller.RemoveTaintOffNode(kubeClient, node.Name, node, taintsToRemove...)
if err != nil {
utilruntime.HandleError(
fmt.Errorf(
"unable to remove %+v unneeded taint from unresponsive Node %q: %v",
taintsToRemove,
node.Name,
err))
return false
}
glog.V(4).Infof("Made sure that Node %+v has no %v Taint", node.Name, taintsToRemove)
return true
}
// CreateAddNodeHandler creates an add node handler.
func CreateAddNodeHandler(f func(node *v1.Node) error) func(obj interface{}) {
return func(originalObj interface{}) {
node := originalObj.(*v1.Node).DeepCopy()
if err := f(node); err != nil {
utilruntime.HandleError(fmt.Errorf("Error while processing Node Delete: %v", err))
}
}
}
// CreateUpdateNodeHandler creates a node update handler.
func CreateUpdateNodeHandler(f func(oldNode, newNode *v1.Node) error) func(oldObj, newObj interface{}) {
return func(origOldObj, origNewObj interface{}) {
node := origNewObj.(*v1.Node).DeepCopy()
prevNode := origOldObj.(*v1.Node).DeepCopy()
if err := f(prevNode, node); err != nil {
utilruntime.HandleError(fmt.Errorf("Error while processing Node Add/Delete: %v", err))
}
}
}
// CreateDeleteNodeHandler creates a delete node handler.
func CreateDeleteNodeHandler(f func(node *v1.Node) error) func(obj interface{}) {
return func(originalObj interface{}) {
originalNode, isNode := originalObj.(*v1.Node)
// We can get DeletedFinalStateUnknown instead of *v1.Node here and
// we need to handle that correctly. #34692
if !isNode {
deletedState, ok := originalObj.(cache.DeletedFinalStateUnknown)
if !ok {
glog.Errorf("Received unexpected object: %v", originalObj)
return
}
originalNode, ok = deletedState.Obj.(*v1.Node)
if !ok {
glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj)
return
}
}
node := originalNode.DeepCopy()
if err := f(node); err != nil {
utilruntime.HandleError(fmt.Errorf("Error while processing Node Add/Delete: %v", err))
}
}
}