mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 10:33:35 +00:00
vendor update for E2E framework
Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
265
vendor/k8s.io/kubernetes/pkg/controller/client_builder.go
generated
vendored
Normal file
265
vendor/k8s.io/kubernetes/pkg/controller/client_builder.go
generated
vendored
Normal file
@ -0,0 +1,265 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package controller
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
v1authenticationapi "k8s.io/api/authentication/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
apiserverserviceaccount "k8s.io/apiserver/pkg/authentication/serviceaccount"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
v1authentication "k8s.io/client-go/kubernetes/typed/authentication/v1"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
watchtools "k8s.io/client-go/tools/watch"
|
||||
"k8s.io/klog"
|
||||
"k8s.io/kubernetes/pkg/api/legacyscheme"
|
||||
api "k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/serviceaccount"
|
||||
)
|
||||
|
||||
// ControllerClientBuilder allows you to get clients and configs for controllers
|
||||
// Please note a copy also exists in staging/src/k8s.io/cloud-provider/cloud.go
|
||||
// TODO: Extract this into a separate controller utilities repo (issues/68947)
|
||||
type ControllerClientBuilder interface {
|
||||
Config(name string) (*restclient.Config, error)
|
||||
ConfigOrDie(name string) *restclient.Config
|
||||
Client(name string) (clientset.Interface, error)
|
||||
ClientOrDie(name string) clientset.Interface
|
||||
}
|
||||
|
||||
// SimpleControllerClientBuilder returns a fixed client with different user agents
|
||||
type SimpleControllerClientBuilder struct {
|
||||
// ClientConfig is a skeleton config to clone and use as the basis for each controller client
|
||||
ClientConfig *restclient.Config
|
||||
}
|
||||
|
||||
func (b SimpleControllerClientBuilder) Config(name string) (*restclient.Config, error) {
|
||||
clientConfig := *b.ClientConfig
|
||||
return restclient.AddUserAgent(&clientConfig, name), nil
|
||||
}
|
||||
|
||||
func (b SimpleControllerClientBuilder) ConfigOrDie(name string) *restclient.Config {
|
||||
clientConfig, err := b.Config(name)
|
||||
if err != nil {
|
||||
klog.Fatal(err)
|
||||
}
|
||||
return clientConfig
|
||||
}
|
||||
|
||||
func (b SimpleControllerClientBuilder) Client(name string) (clientset.Interface, error) {
|
||||
clientConfig, err := b.Config(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return clientset.NewForConfig(clientConfig)
|
||||
}
|
||||
|
||||
func (b SimpleControllerClientBuilder) ClientOrDie(name string) clientset.Interface {
|
||||
client, err := b.Client(name)
|
||||
if err != nil {
|
||||
klog.Fatal(err)
|
||||
}
|
||||
return client
|
||||
}
|
||||
|
||||
// SAControllerClientBuilder is a ControllerClientBuilder that returns clients identifying as
|
||||
// service accounts
|
||||
type SAControllerClientBuilder struct {
|
||||
// ClientConfig is a skeleton config to clone and use as the basis for each controller client
|
||||
ClientConfig *restclient.Config
|
||||
|
||||
// CoreClient is used to provision service accounts if needed and watch for their associated tokens
|
||||
// to construct a controller client
|
||||
CoreClient v1core.CoreV1Interface
|
||||
|
||||
// AuthenticationClient is used to check API tokens to make sure they are valid before
|
||||
// building a controller client from them
|
||||
AuthenticationClient v1authentication.AuthenticationV1Interface
|
||||
|
||||
// Namespace is the namespace used to host the service accounts that will back the
|
||||
// controllers. It must be highly privileged namespace which normal users cannot inspect.
|
||||
Namespace string
|
||||
}
|
||||
|
||||
// config returns a complete clientConfig for constructing clients. This is separate in anticipation of composition
|
||||
// which means that not all clientsets are known here
|
||||
func (b SAControllerClientBuilder) Config(name string) (*restclient.Config, error) {
|
||||
sa, err := b.getOrCreateServiceAccount(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var clientConfig *restclient.Config
|
||||
fieldSelector := fields.SelectorFromSet(map[string]string{
|
||||
api.SecretTypeField: string(v1.SecretTypeServiceAccountToken),
|
||||
}).String()
|
||||
lw := &cache.ListWatch{
|
||||
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
|
||||
options.FieldSelector = fieldSelector
|
||||
return b.CoreClient.Secrets(b.Namespace).List(options)
|
||||
},
|
||||
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
|
||||
options.FieldSelector = fieldSelector
|
||||
return b.CoreClient.Secrets(b.Namespace).Watch(options)
|
||||
},
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
_, err = watchtools.UntilWithSync(ctx, lw, &v1.Secret{}, nil,
|
||||
func(event watch.Event) (bool, error) {
|
||||
switch event.Type {
|
||||
case watch.Deleted:
|
||||
return false, nil
|
||||
case watch.Error:
|
||||
return false, fmt.Errorf("error watching")
|
||||
|
||||
case watch.Added, watch.Modified:
|
||||
secret, ok := event.Object.(*v1.Secret)
|
||||
if !ok {
|
||||
return false, fmt.Errorf("unexpected object type: %T", event.Object)
|
||||
}
|
||||
if !serviceaccount.IsServiceAccountToken(secret, sa) {
|
||||
return false, nil
|
||||
}
|
||||
if len(secret.Data[v1.ServiceAccountTokenKey]) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
validConfig, valid, err := b.getAuthenticatedConfig(sa, string(secret.Data[v1.ServiceAccountTokenKey]))
|
||||
if err != nil {
|
||||
klog.Warningf("error validating API token for %s/%s in secret %s: %v", sa.Namespace, sa.Name, secret.Name, err)
|
||||
// continue watching for good tokens
|
||||
return false, nil
|
||||
}
|
||||
if !valid {
|
||||
klog.Warningf("secret %s contained an invalid API token for %s/%s", secret.Name, sa.Namespace, sa.Name)
|
||||
// try to delete the secret containing the invalid token
|
||||
if err := b.CoreClient.Secrets(secret.Namespace).Delete(secret.Name, &metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
|
||||
klog.Warningf("error deleting secret %s containing invalid API token for %s/%s: %v", secret.Name, sa.Namespace, sa.Name, err)
|
||||
}
|
||||
// continue watching for good tokens
|
||||
return false, nil
|
||||
}
|
||||
clientConfig = validConfig
|
||||
return true, nil
|
||||
|
||||
default:
|
||||
return false, fmt.Errorf("unexpected event type: %v", event.Type)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get token for service account: %v", err)
|
||||
}
|
||||
|
||||
return clientConfig, nil
|
||||
}
|
||||
|
||||
func (b SAControllerClientBuilder) getOrCreateServiceAccount(name string) (*v1.ServiceAccount, error) {
|
||||
sa, err := b.CoreClient.ServiceAccounts(b.Namespace).Get(name, metav1.GetOptions{})
|
||||
if err == nil {
|
||||
return sa, nil
|
||||
}
|
||||
if !apierrors.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the namespace if we can't verify it exists.
|
||||
// Tolerate errors, since we don't know whether this component has namespace creation permissions.
|
||||
if _, err := b.CoreClient.Namespaces().Get(b.Namespace, metav1.GetOptions{}); err != nil {
|
||||
b.CoreClient.Namespaces().Create(&v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: b.Namespace}})
|
||||
}
|
||||
|
||||
// Create the service account
|
||||
sa, err = b.CoreClient.ServiceAccounts(b.Namespace).Create(&v1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Namespace: b.Namespace, Name: name}})
|
||||
if apierrors.IsAlreadyExists(err) {
|
||||
// If we're racing to init and someone else already created it, re-fetch
|
||||
return b.CoreClient.ServiceAccounts(b.Namespace).Get(name, metav1.GetOptions{})
|
||||
}
|
||||
return sa, err
|
||||
}
|
||||
|
||||
func (b SAControllerClientBuilder) getAuthenticatedConfig(sa *v1.ServiceAccount, token string) (*restclient.Config, bool, error) {
|
||||
username := apiserverserviceaccount.MakeUsername(sa.Namespace, sa.Name)
|
||||
|
||||
clientConfig := restclient.AnonymousClientConfig(b.ClientConfig)
|
||||
clientConfig.BearerToken = token
|
||||
restclient.AddUserAgent(clientConfig, username)
|
||||
|
||||
// Try token review first
|
||||
tokenReview := &v1authenticationapi.TokenReview{Spec: v1authenticationapi.TokenReviewSpec{Token: token}}
|
||||
if tokenResult, err := b.AuthenticationClient.TokenReviews().Create(tokenReview); err == nil {
|
||||
if !tokenResult.Status.Authenticated {
|
||||
klog.Warningf("Token for %s/%s did not authenticate correctly", sa.Namespace, sa.Name)
|
||||
return nil, false, nil
|
||||
}
|
||||
if tokenResult.Status.User.Username != username {
|
||||
klog.Warningf("Token for %s/%s authenticated as unexpected username: %s", sa.Namespace, sa.Name, tokenResult.Status.User.Username)
|
||||
return nil, false, nil
|
||||
}
|
||||
klog.V(4).Infof("Verified credential for %s/%s", sa.Namespace, sa.Name)
|
||||
return clientConfig, true, nil
|
||||
}
|
||||
|
||||
// If we couldn't run the token review, the API might be disabled or we might not have permission.
|
||||
// Try to make a request to /apis with the token. If we get a 401 we should consider the token invalid.
|
||||
clientConfigCopy := *clientConfig
|
||||
clientConfigCopy.NegotiatedSerializer = legacyscheme.Codecs
|
||||
client, err := restclient.UnversionedRESTClientFor(&clientConfigCopy)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
err = client.Get().AbsPath("/apis").Do().Error()
|
||||
if apierrors.IsUnauthorized(err) {
|
||||
klog.Warningf("Token for %s/%s did not authenticate correctly: %v", sa.Namespace, sa.Name, err)
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
return clientConfig, true, nil
|
||||
}
|
||||
|
||||
func (b SAControllerClientBuilder) ConfigOrDie(name string) *restclient.Config {
|
||||
clientConfig, err := b.Config(name)
|
||||
if err != nil {
|
||||
klog.Fatal(err)
|
||||
}
|
||||
return clientConfig
|
||||
}
|
||||
|
||||
func (b SAControllerClientBuilder) Client(name string) (clientset.Interface, error) {
|
||||
clientConfig, err := b.Config(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return clientset.NewForConfig(clientConfig)
|
||||
}
|
||||
|
||||
func (b SAControllerClientBuilder) ClientOrDie(name string) clientset.Interface {
|
||||
client, err := b.Client(name)
|
||||
if err != nil {
|
||||
klog.Fatal(err)
|
||||
}
|
||||
return client
|
||||
}
|
501
vendor/k8s.io/kubernetes/pkg/controller/controller_ref_manager.go
generated
vendored
Normal file
501
vendor/k8s.io/kubernetes/pkg/controller/controller_ref_manager.go
generated
vendored
Normal file
@ -0,0 +1,501 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package controller
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
apps "k8s.io/api/apps/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
type BaseControllerRefManager struct {
|
||||
Controller metav1.Object
|
||||
Selector labels.Selector
|
||||
|
||||
canAdoptErr error
|
||||
canAdoptOnce sync.Once
|
||||
CanAdoptFunc func() error
|
||||
}
|
||||
|
||||
func (m *BaseControllerRefManager) CanAdopt() error {
|
||||
m.canAdoptOnce.Do(func() {
|
||||
if m.CanAdoptFunc != nil {
|
||||
m.canAdoptErr = m.CanAdoptFunc()
|
||||
}
|
||||
})
|
||||
return m.canAdoptErr
|
||||
}
|
||||
|
||||
// ClaimObject tries to take ownership of an object for this controller.
|
||||
//
|
||||
// It will reconcile the following:
|
||||
// * Adopt orphans if the match function returns true.
|
||||
// * Release owned objects if the match function returns false.
|
||||
//
|
||||
// A non-nil error is returned if some form of reconciliation was attempted and
|
||||
// failed. Usually, controllers should try again later in case reconciliation
|
||||
// is still needed.
|
||||
//
|
||||
// If the error is nil, either the reconciliation succeeded, or no
|
||||
// reconciliation was necessary. The returned boolean indicates whether you now
|
||||
// own the object.
|
||||
//
|
||||
// No reconciliation will be attempted if the controller is being deleted.
|
||||
func (m *BaseControllerRefManager) ClaimObject(obj metav1.Object, match func(metav1.Object) bool, adopt, release func(metav1.Object) error) (bool, error) {
|
||||
controllerRef := metav1.GetControllerOf(obj)
|
||||
if controllerRef != nil {
|
||||
if controllerRef.UID != m.Controller.GetUID() {
|
||||
// Owned by someone else. Ignore.
|
||||
return false, nil
|
||||
}
|
||||
if match(obj) {
|
||||
// We already own it and the selector matches.
|
||||
// Return true (successfully claimed) before checking deletion timestamp.
|
||||
// We're still allowed to claim things we already own while being deleted
|
||||
// because doing so requires taking no actions.
|
||||
return true, nil
|
||||
}
|
||||
// Owned by us but selector doesn't match.
|
||||
// Try to release, unless we're being deleted.
|
||||
if m.Controller.GetDeletionTimestamp() != nil {
|
||||
return false, nil
|
||||
}
|
||||
if err := release(obj); err != nil {
|
||||
// If the pod no longer exists, ignore the error.
|
||||
if errors.IsNotFound(err) {
|
||||
return false, nil
|
||||
}
|
||||
// Either someone else released it, or there was a transient error.
|
||||
// The controller should requeue and try again if it's still stale.
|
||||
return false, err
|
||||
}
|
||||
// Successfully released.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// It's an orphan.
|
||||
if m.Controller.GetDeletionTimestamp() != nil || !match(obj) {
|
||||
// Ignore if we're being deleted or selector doesn't match.
|
||||
return false, nil
|
||||
}
|
||||
if obj.GetDeletionTimestamp() != nil {
|
||||
// Ignore if the object is being deleted
|
||||
return false, nil
|
||||
}
|
||||
// Selector matches. Try to adopt.
|
||||
if err := adopt(obj); err != nil {
|
||||
// If the pod no longer exists, ignore the error.
|
||||
if errors.IsNotFound(err) {
|
||||
return false, nil
|
||||
}
|
||||
// Either someone else claimed it first, or there was a transient error.
|
||||
// The controller should requeue and try again if it's still orphaned.
|
||||
return false, err
|
||||
}
|
||||
// Successfully adopted.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
type PodControllerRefManager struct {
|
||||
BaseControllerRefManager
|
||||
controllerKind schema.GroupVersionKind
|
||||
podControl PodControlInterface
|
||||
}
|
||||
|
||||
// NewPodControllerRefManager returns a PodControllerRefManager that exposes
|
||||
// methods to manage the controllerRef of pods.
|
||||
//
|
||||
// The CanAdopt() function can be used to perform a potentially expensive check
|
||||
// (such as a live GET from the API server) prior to the first adoption.
|
||||
// It will only be called (at most once) if an adoption is actually attempted.
|
||||
// If CanAdopt() returns a non-nil error, all adoptions will fail.
|
||||
//
|
||||
// NOTE: Once CanAdopt() is called, it will not be called again by the same
|
||||
// PodControllerRefManager instance. Create a new instance if it makes
|
||||
// sense to check CanAdopt() again (e.g. in a different sync pass).
|
||||
func NewPodControllerRefManager(
|
||||
podControl PodControlInterface,
|
||||
controller metav1.Object,
|
||||
selector labels.Selector,
|
||||
controllerKind schema.GroupVersionKind,
|
||||
canAdopt func() error,
|
||||
) *PodControllerRefManager {
|
||||
return &PodControllerRefManager{
|
||||
BaseControllerRefManager: BaseControllerRefManager{
|
||||
Controller: controller,
|
||||
Selector: selector,
|
||||
CanAdoptFunc: canAdopt,
|
||||
},
|
||||
controllerKind: controllerKind,
|
||||
podControl: podControl,
|
||||
}
|
||||
}
|
||||
|
||||
// ClaimPods tries to take ownership of a list of Pods.
|
||||
//
|
||||
// It will reconcile the following:
|
||||
// * Adopt orphans if the selector matches.
|
||||
// * Release owned objects if the selector no longer matches.
|
||||
//
|
||||
// Optional: If one or more filters are specified, a Pod will only be claimed if
|
||||
// all filters return true.
|
||||
//
|
||||
// A non-nil error is returned if some form of reconciliation was attempted and
|
||||
// failed. Usually, controllers should try again later in case reconciliation
|
||||
// is still needed.
|
||||
//
|
||||
// If the error is nil, either the reconciliation succeeded, or no
|
||||
// reconciliation was necessary. The list of Pods that you now own is returned.
|
||||
func (m *PodControllerRefManager) ClaimPods(pods []*v1.Pod, filters ...func(*v1.Pod) bool) ([]*v1.Pod, error) {
|
||||
var claimed []*v1.Pod
|
||||
var errlist []error
|
||||
|
||||
match := func(obj metav1.Object) bool {
|
||||
pod := obj.(*v1.Pod)
|
||||
// Check selector first so filters only run on potentially matching Pods.
|
||||
if !m.Selector.Matches(labels.Set(pod.Labels)) {
|
||||
return false
|
||||
}
|
||||
for _, filter := range filters {
|
||||
if !filter(pod) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
adopt := func(obj metav1.Object) error {
|
||||
return m.AdoptPod(obj.(*v1.Pod))
|
||||
}
|
||||
release := func(obj metav1.Object) error {
|
||||
return m.ReleasePod(obj.(*v1.Pod))
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
ok, err := m.ClaimObject(pod, match, adopt, release)
|
||||
if err != nil {
|
||||
errlist = append(errlist, err)
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
claimed = append(claimed, pod)
|
||||
}
|
||||
}
|
||||
return claimed, utilerrors.NewAggregate(errlist)
|
||||
}
|
||||
|
||||
// AdoptPod sends a patch to take control of the pod. It returns the error if
|
||||
// the patching fails.
|
||||
func (m *PodControllerRefManager) AdoptPod(pod *v1.Pod) error {
|
||||
if err := m.CanAdopt(); err != nil {
|
||||
return fmt.Errorf("can't adopt Pod %v/%v (%v): %v", pod.Namespace, pod.Name, pod.UID, err)
|
||||
}
|
||||
// Note that ValidateOwnerReferences() will reject this patch if another
|
||||
// OwnerReference exists with controller=true.
|
||||
addControllerPatch := fmt.Sprintf(
|
||||
`{"metadata":{"ownerReferences":[{"apiVersion":"%s","kind":"%s","name":"%s","uid":"%s","controller":true,"blockOwnerDeletion":true}],"uid":"%s"}}`,
|
||||
m.controllerKind.GroupVersion(), m.controllerKind.Kind,
|
||||
m.Controller.GetName(), m.Controller.GetUID(), pod.UID)
|
||||
return m.podControl.PatchPod(pod.Namespace, pod.Name, []byte(addControllerPatch))
|
||||
}
|
||||
|
||||
// ReleasePod sends a patch to free the pod from the control of the controller.
|
||||
// It returns the error if the patching fails. 404 and 422 errors are ignored.
|
||||
func (m *PodControllerRefManager) ReleasePod(pod *v1.Pod) error {
|
||||
klog.V(2).Infof("patching pod %s_%s to remove its controllerRef to %s/%s:%s",
|
||||
pod.Namespace, pod.Name, m.controllerKind.GroupVersion(), m.controllerKind.Kind, m.Controller.GetName())
|
||||
deleteOwnerRefPatch := fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, m.Controller.GetUID(), pod.UID)
|
||||
err := m.podControl.PatchPod(pod.Namespace, pod.Name, []byte(deleteOwnerRefPatch))
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
// If the pod no longer exists, ignore it.
|
||||
return nil
|
||||
}
|
||||
if errors.IsInvalid(err) {
|
||||
// Invalid error will be returned in two cases: 1. the pod
|
||||
// has no owner reference, 2. the uid of the pod doesn't
|
||||
// match, which means the pod is deleted and then recreated.
|
||||
// In both cases, the error can be ignored.
|
||||
|
||||
// TODO: If the pod has owner references, but none of them
|
||||
// has the owner.UID, server will silently ignore the patch.
|
||||
// Investigate why.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// ReplicaSetControllerRefManager is used to manage controllerRef of ReplicaSets.
|
||||
// Three methods are defined on this object 1: Classify 2: AdoptReplicaSet and
|
||||
// 3: ReleaseReplicaSet which are used to classify the ReplicaSets into appropriate
|
||||
// categories and accordingly adopt or release them. See comments on these functions
|
||||
// for more details.
|
||||
type ReplicaSetControllerRefManager struct {
|
||||
BaseControllerRefManager
|
||||
controllerKind schema.GroupVersionKind
|
||||
rsControl RSControlInterface
|
||||
}
|
||||
|
||||
// NewReplicaSetControllerRefManager returns a ReplicaSetControllerRefManager that exposes
|
||||
// methods to manage the controllerRef of ReplicaSets.
|
||||
//
|
||||
// The CanAdopt() function can be used to perform a potentially expensive check
|
||||
// (such as a live GET from the API server) prior to the first adoption.
|
||||
// It will only be called (at most once) if an adoption is actually attempted.
|
||||
// If CanAdopt() returns a non-nil error, all adoptions will fail.
|
||||
//
|
||||
// NOTE: Once CanAdopt() is called, it will not be called again by the same
|
||||
// ReplicaSetControllerRefManager instance. Create a new instance if it
|
||||
// makes sense to check CanAdopt() again (e.g. in a different sync pass).
|
||||
func NewReplicaSetControllerRefManager(
|
||||
rsControl RSControlInterface,
|
||||
controller metav1.Object,
|
||||
selector labels.Selector,
|
||||
controllerKind schema.GroupVersionKind,
|
||||
canAdopt func() error,
|
||||
) *ReplicaSetControllerRefManager {
|
||||
return &ReplicaSetControllerRefManager{
|
||||
BaseControllerRefManager: BaseControllerRefManager{
|
||||
Controller: controller,
|
||||
Selector: selector,
|
||||
CanAdoptFunc: canAdopt,
|
||||
},
|
||||
controllerKind: controllerKind,
|
||||
rsControl: rsControl,
|
||||
}
|
||||
}
|
||||
|
||||
// ClaimReplicaSets tries to take ownership of a list of ReplicaSets.
|
||||
//
|
||||
// It will reconcile the following:
|
||||
// * Adopt orphans if the selector matches.
|
||||
// * Release owned objects if the selector no longer matches.
|
||||
//
|
||||
// A non-nil error is returned if some form of reconciliation was attempted and
|
||||
// failed. Usually, controllers should try again later in case reconciliation
|
||||
// is still needed.
|
||||
//
|
||||
// If the error is nil, either the reconciliation succeeded, or no
|
||||
// reconciliation was necessary. The list of ReplicaSets that you now own is
|
||||
// returned.
|
||||
func (m *ReplicaSetControllerRefManager) ClaimReplicaSets(sets []*apps.ReplicaSet) ([]*apps.ReplicaSet, error) {
|
||||
var claimed []*apps.ReplicaSet
|
||||
var errlist []error
|
||||
|
||||
match := func(obj metav1.Object) bool {
|
||||
return m.Selector.Matches(labels.Set(obj.GetLabels()))
|
||||
}
|
||||
adopt := func(obj metav1.Object) error {
|
||||
return m.AdoptReplicaSet(obj.(*apps.ReplicaSet))
|
||||
}
|
||||
release := func(obj metav1.Object) error {
|
||||
return m.ReleaseReplicaSet(obj.(*apps.ReplicaSet))
|
||||
}
|
||||
|
||||
for _, rs := range sets {
|
||||
ok, err := m.ClaimObject(rs, match, adopt, release)
|
||||
if err != nil {
|
||||
errlist = append(errlist, err)
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
claimed = append(claimed, rs)
|
||||
}
|
||||
}
|
||||
return claimed, utilerrors.NewAggregate(errlist)
|
||||
}
|
||||
|
||||
// AdoptReplicaSet sends a patch to take control of the ReplicaSet. It returns
|
||||
// the error if the patching fails.
|
||||
func (m *ReplicaSetControllerRefManager) AdoptReplicaSet(rs *apps.ReplicaSet) error {
|
||||
if err := m.CanAdopt(); err != nil {
|
||||
return fmt.Errorf("can't adopt ReplicaSet %v/%v (%v): %v", rs.Namespace, rs.Name, rs.UID, err)
|
||||
}
|
||||
// Note that ValidateOwnerReferences() will reject this patch if another
|
||||
// OwnerReference exists with controller=true.
|
||||
addControllerPatch := fmt.Sprintf(
|
||||
`{"metadata":{"ownerReferences":[{"apiVersion":"%s","kind":"%s","name":"%s","uid":"%s","controller":true,"blockOwnerDeletion":true}],"uid":"%s"}}`,
|
||||
m.controllerKind.GroupVersion(), m.controllerKind.Kind,
|
||||
m.Controller.GetName(), m.Controller.GetUID(), rs.UID)
|
||||
return m.rsControl.PatchReplicaSet(rs.Namespace, rs.Name, []byte(addControllerPatch))
|
||||
}
|
||||
|
||||
// ReleaseReplicaSet sends a patch to free the ReplicaSet from the control of the Deployment controller.
|
||||
// It returns the error if the patching fails. 404 and 422 errors are ignored.
|
||||
func (m *ReplicaSetControllerRefManager) ReleaseReplicaSet(replicaSet *apps.ReplicaSet) error {
|
||||
klog.V(2).Infof("patching ReplicaSet %s_%s to remove its controllerRef to %s/%s:%s",
|
||||
replicaSet.Namespace, replicaSet.Name, m.controllerKind.GroupVersion(), m.controllerKind.Kind, m.Controller.GetName())
|
||||
deleteOwnerRefPatch := fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, m.Controller.GetUID(), replicaSet.UID)
|
||||
err := m.rsControl.PatchReplicaSet(replicaSet.Namespace, replicaSet.Name, []byte(deleteOwnerRefPatch))
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
// If the ReplicaSet no longer exists, ignore it.
|
||||
return nil
|
||||
}
|
||||
if errors.IsInvalid(err) {
|
||||
// Invalid error will be returned in two cases: 1. the ReplicaSet
|
||||
// has no owner reference, 2. the uid of the ReplicaSet doesn't
|
||||
// match, which means the ReplicaSet is deleted and then recreated.
|
||||
// In both cases, the error can be ignored.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// RecheckDeletionTimestamp returns a CanAdopt() function to recheck deletion.
|
||||
//
|
||||
// The CanAdopt() function calls getObject() to fetch the latest value,
|
||||
// and denies adoption attempts if that object has a non-nil DeletionTimestamp.
|
||||
func RecheckDeletionTimestamp(getObject func() (metav1.Object, error)) func() error {
|
||||
return func() error {
|
||||
obj, err := getObject()
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't recheck DeletionTimestamp: %v", err)
|
||||
}
|
||||
if obj.GetDeletionTimestamp() != nil {
|
||||
return fmt.Errorf("%v/%v has just been deleted at %v", obj.GetNamespace(), obj.GetName(), obj.GetDeletionTimestamp())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// ControllerRevisionControllerRefManager is used to manage controllerRef of ControllerRevisions.
|
||||
// Three methods are defined on this object 1: Classify 2: AdoptControllerRevision and
|
||||
// 3: ReleaseControllerRevision which are used to classify the ControllerRevisions into appropriate
|
||||
// categories and accordingly adopt or release them. See comments on these functions
|
||||
// for more details.
|
||||
type ControllerRevisionControllerRefManager struct {
|
||||
BaseControllerRefManager
|
||||
controllerKind schema.GroupVersionKind
|
||||
crControl ControllerRevisionControlInterface
|
||||
}
|
||||
|
||||
// NewControllerRevisionControllerRefManager returns a ControllerRevisionControllerRefManager that exposes
|
||||
// methods to manage the controllerRef of ControllerRevisions.
|
||||
//
|
||||
// The canAdopt() function can be used to perform a potentially expensive check
|
||||
// (such as a live GET from the API server) prior to the first adoption.
|
||||
// It will only be called (at most once) if an adoption is actually attempted.
|
||||
// If canAdopt() returns a non-nil error, all adoptions will fail.
|
||||
//
|
||||
// NOTE: Once canAdopt() is called, it will not be called again by the same
|
||||
// ControllerRevisionControllerRefManager instance. Create a new instance if it
|
||||
// makes sense to check canAdopt() again (e.g. in a different sync pass).
|
||||
func NewControllerRevisionControllerRefManager(
|
||||
crControl ControllerRevisionControlInterface,
|
||||
controller metav1.Object,
|
||||
selector labels.Selector,
|
||||
controllerKind schema.GroupVersionKind,
|
||||
canAdopt func() error,
|
||||
) *ControllerRevisionControllerRefManager {
|
||||
return &ControllerRevisionControllerRefManager{
|
||||
BaseControllerRefManager: BaseControllerRefManager{
|
||||
Controller: controller,
|
||||
Selector: selector,
|
||||
CanAdoptFunc: canAdopt,
|
||||
},
|
||||
controllerKind: controllerKind,
|
||||
crControl: crControl,
|
||||
}
|
||||
}
|
||||
|
||||
// ClaimControllerRevisions tries to take ownership of a list of ControllerRevisions.
|
||||
//
|
||||
// It will reconcile the following:
|
||||
// * Adopt orphans if the selector matches.
|
||||
// * Release owned objects if the selector no longer matches.
|
||||
//
|
||||
// A non-nil error is returned if some form of reconciliation was attempted and
|
||||
// failed. Usually, controllers should try again later in case reconciliation
|
||||
// is still needed.
|
||||
//
|
||||
// If the error is nil, either the reconciliation succeeded, or no
|
||||
// reconciliation was necessary. The list of ControllerRevisions that you now own is
|
||||
// returned.
|
||||
func (m *ControllerRevisionControllerRefManager) ClaimControllerRevisions(histories []*apps.ControllerRevision) ([]*apps.ControllerRevision, error) {
|
||||
var claimed []*apps.ControllerRevision
|
||||
var errlist []error
|
||||
|
||||
match := func(obj metav1.Object) bool {
|
||||
return m.Selector.Matches(labels.Set(obj.GetLabels()))
|
||||
}
|
||||
adopt := func(obj metav1.Object) error {
|
||||
return m.AdoptControllerRevision(obj.(*apps.ControllerRevision))
|
||||
}
|
||||
release := func(obj metav1.Object) error {
|
||||
return m.ReleaseControllerRevision(obj.(*apps.ControllerRevision))
|
||||
}
|
||||
|
||||
for _, h := range histories {
|
||||
ok, err := m.ClaimObject(h, match, adopt, release)
|
||||
if err != nil {
|
||||
errlist = append(errlist, err)
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
claimed = append(claimed, h)
|
||||
}
|
||||
}
|
||||
return claimed, utilerrors.NewAggregate(errlist)
|
||||
}
|
||||
|
||||
// AdoptControllerRevision sends a patch to take control of the ControllerRevision. It returns the error if
|
||||
// the patching fails.
|
||||
func (m *ControllerRevisionControllerRefManager) AdoptControllerRevision(history *apps.ControllerRevision) error {
|
||||
if err := m.CanAdopt(); err != nil {
|
||||
return fmt.Errorf("can't adopt ControllerRevision %v/%v (%v): %v", history.Namespace, history.Name, history.UID, err)
|
||||
}
|
||||
// Note that ValidateOwnerReferences() will reject this patch if another
|
||||
// OwnerReference exists with controller=true.
|
||||
addControllerPatch := fmt.Sprintf(
|
||||
`{"metadata":{"ownerReferences":[{"apiVersion":"%s","kind":"%s","name":"%s","uid":"%s","controller":true,"blockOwnerDeletion":true}],"uid":"%s"}}`,
|
||||
m.controllerKind.GroupVersion(), m.controllerKind.Kind,
|
||||
m.Controller.GetName(), m.Controller.GetUID(), history.UID)
|
||||
return m.crControl.PatchControllerRevision(history.Namespace, history.Name, []byte(addControllerPatch))
|
||||
}
|
||||
|
||||
// ReleaseControllerRevision sends a patch to free the ControllerRevision from the control of its controller.
|
||||
// It returns the error if the patching fails. 404 and 422 errors are ignored.
|
||||
func (m *ControllerRevisionControllerRefManager) ReleaseControllerRevision(history *apps.ControllerRevision) error {
|
||||
klog.V(2).Infof("patching ControllerRevision %s_%s to remove its controllerRef to %s/%s:%s",
|
||||
history.Namespace, history.Name, m.controllerKind.GroupVersion(), m.controllerKind.Kind, m.Controller.GetName())
|
||||
deleteOwnerRefPatch := fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, m.Controller.GetUID(), history.UID)
|
||||
err := m.crControl.PatchControllerRevision(history.Namespace, history.Name, []byte(deleteOwnerRefPatch))
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
// If the ControllerRevision no longer exists, ignore it.
|
||||
return nil
|
||||
}
|
||||
if errors.IsInvalid(err) {
|
||||
// Invalid error will be returned in two cases: 1. the ControllerRevision
|
||||
// has no owner reference, 2. the uid of the ControllerRevision doesn't
|
||||
// match, which means the ControllerRevision is deleted and then recreated.
|
||||
// In both cases, the error can be ignored.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
1098
vendor/k8s.io/kubernetes/pkg/controller/controller_utils.go
generated
vendored
Normal file
1098
vendor/k8s.io/kubernetes/pkg/controller/controller_utils.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
903
vendor/k8s.io/kubernetes/pkg/controller/deployment/util/deployment_util.go
generated
vendored
Normal file
903
vendor/k8s.io/kubernetes/pkg/controller/deployment/util/deployment_util.go
generated
vendored
Normal file
@ -0,0 +1,903 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog"
|
||||
|
||||
apps "k8s.io/api/apps/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
intstrutil "k8s.io/apimachinery/pkg/util/intstr"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
appsclient "k8s.io/client-go/kubernetes/typed/apps/v1"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
labelsutil "k8s.io/kubernetes/pkg/util/labels"
|
||||
"k8s.io/utils/integer"
|
||||
)
|
||||
|
||||
const (
|
||||
// RevisionAnnotation is the revision annotation of a deployment's replica sets which records its rollout sequence
|
||||
RevisionAnnotation = "deployment.kubernetes.io/revision"
|
||||
// RevisionHistoryAnnotation maintains the history of all old revisions that a replica set has served for a deployment.
|
||||
RevisionHistoryAnnotation = "deployment.kubernetes.io/revision-history"
|
||||
// DesiredReplicasAnnotation is the desired replicas for a deployment recorded as an annotation
|
||||
// in its replica sets. Helps in separating scaling events from the rollout process and for
|
||||
// determining if the new replica set for a deployment is really saturated.
|
||||
DesiredReplicasAnnotation = "deployment.kubernetes.io/desired-replicas"
|
||||
// MaxReplicasAnnotation is the maximum replicas a deployment can have at a given point, which
|
||||
// is deployment.spec.replicas + maxSurge. Used by the underlying replica sets to estimate their
|
||||
// proportions in case the deployment has surge replicas.
|
||||
MaxReplicasAnnotation = "deployment.kubernetes.io/max-replicas"
|
||||
|
||||
// RollbackRevisionNotFound is not found rollback event reason
|
||||
RollbackRevisionNotFound = "DeploymentRollbackRevisionNotFound"
|
||||
// RollbackTemplateUnchanged is the template unchanged rollback event reason
|
||||
RollbackTemplateUnchanged = "DeploymentRollbackTemplateUnchanged"
|
||||
// RollbackDone is the done rollback event reason
|
||||
RollbackDone = "DeploymentRollback"
|
||||
|
||||
// Reasons for deployment conditions
|
||||
//
|
||||
// Progressing:
|
||||
|
||||
// ReplicaSetUpdatedReason is added in a deployment when one of its replica sets is updated as part
|
||||
// of the rollout process.
|
||||
ReplicaSetUpdatedReason = "ReplicaSetUpdated"
|
||||
// FailedRSCreateReason is added in a deployment when it cannot create a new replica set.
|
||||
FailedRSCreateReason = "ReplicaSetCreateError"
|
||||
// NewReplicaSetReason is added in a deployment when it creates a new replica set.
|
||||
NewReplicaSetReason = "NewReplicaSetCreated"
|
||||
// FoundNewRSReason is added in a deployment when it adopts an existing replica set.
|
||||
FoundNewRSReason = "FoundNewReplicaSet"
|
||||
// NewRSAvailableReason is added in a deployment when its newest replica set is made available
|
||||
// ie. the number of new pods that have passed readiness checks and run for at least minReadySeconds
|
||||
// is at least the minimum available pods that need to run for the deployment.
|
||||
NewRSAvailableReason = "NewReplicaSetAvailable"
|
||||
// TimedOutReason is added in a deployment when its newest replica set fails to show any progress
|
||||
// within the given deadline (progressDeadlineSeconds).
|
||||
TimedOutReason = "ProgressDeadlineExceeded"
|
||||
// PausedDeployReason is added in a deployment when it is paused. Lack of progress shouldn't be
|
||||
// estimated once a deployment is paused.
|
||||
PausedDeployReason = "DeploymentPaused"
|
||||
// ResumedDeployReason is added in a deployment when it is resumed. Useful for not failing accidentally
|
||||
// deployments that paused amidst a rollout and are bounded by a deadline.
|
||||
ResumedDeployReason = "DeploymentResumed"
|
||||
//
|
||||
// Available:
|
||||
|
||||
// MinimumReplicasAvailable is added in a deployment when it has its minimum replicas required available.
|
||||
MinimumReplicasAvailable = "MinimumReplicasAvailable"
|
||||
// MinimumReplicasUnavailable is added in a deployment when it doesn't have the minimum required replicas
|
||||
// available.
|
||||
MinimumReplicasUnavailable = "MinimumReplicasUnavailable"
|
||||
)
|
||||
|
||||
// NewDeploymentCondition creates a new deployment condition.
|
||||
func NewDeploymentCondition(condType apps.DeploymentConditionType, status v1.ConditionStatus, reason, message string) *apps.DeploymentCondition {
|
||||
return &apps.DeploymentCondition{
|
||||
Type: condType,
|
||||
Status: status,
|
||||
LastUpdateTime: metav1.Now(),
|
||||
LastTransitionTime: metav1.Now(),
|
||||
Reason: reason,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
|
||||
// GetDeploymentCondition returns the condition with the provided type.
|
||||
func GetDeploymentCondition(status apps.DeploymentStatus, condType apps.DeploymentConditionType) *apps.DeploymentCondition {
|
||||
for i := range status.Conditions {
|
||||
c := status.Conditions[i]
|
||||
if c.Type == condType {
|
||||
return &c
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetDeploymentCondition updates the deployment to include the provided condition. If the condition that
|
||||
// we are about to add already exists and has the same status and reason then we are not going to update.
|
||||
func SetDeploymentCondition(status *apps.DeploymentStatus, condition apps.DeploymentCondition) {
|
||||
currentCond := GetDeploymentCondition(*status, condition.Type)
|
||||
if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
|
||||
return
|
||||
}
|
||||
// Do not update lastTransitionTime if the status of the condition doesn't change.
|
||||
if currentCond != nil && currentCond.Status == condition.Status {
|
||||
condition.LastTransitionTime = currentCond.LastTransitionTime
|
||||
}
|
||||
newConditions := filterOutCondition(status.Conditions, condition.Type)
|
||||
status.Conditions = append(newConditions, condition)
|
||||
}
|
||||
|
||||
// RemoveDeploymentCondition removes the deployment condition with the provided type.
|
||||
func RemoveDeploymentCondition(status *apps.DeploymentStatus, condType apps.DeploymentConditionType) {
|
||||
status.Conditions = filterOutCondition(status.Conditions, condType)
|
||||
}
|
||||
|
||||
// filterOutCondition returns a new slice of deployment conditions without conditions with the provided type.
|
||||
func filterOutCondition(conditions []apps.DeploymentCondition, condType apps.DeploymentConditionType) []apps.DeploymentCondition {
|
||||
var newConditions []apps.DeploymentCondition
|
||||
for _, c := range conditions {
|
||||
if c.Type == condType {
|
||||
continue
|
||||
}
|
||||
newConditions = append(newConditions, c)
|
||||
}
|
||||
return newConditions
|
||||
}
|
||||
|
||||
// ReplicaSetToDeploymentCondition converts a replica set condition into a deployment condition.
|
||||
// Useful for promoting replica set failure conditions into deployments.
|
||||
func ReplicaSetToDeploymentCondition(cond apps.ReplicaSetCondition) apps.DeploymentCondition {
|
||||
return apps.DeploymentCondition{
|
||||
Type: apps.DeploymentConditionType(cond.Type),
|
||||
Status: cond.Status,
|
||||
LastTransitionTime: cond.LastTransitionTime,
|
||||
LastUpdateTime: cond.LastTransitionTime,
|
||||
Reason: cond.Reason,
|
||||
Message: cond.Message,
|
||||
}
|
||||
}
|
||||
|
||||
// SetDeploymentRevision updates the revision for a deployment.
|
||||
func SetDeploymentRevision(deployment *apps.Deployment, revision string) bool {
|
||||
updated := false
|
||||
|
||||
if deployment.Annotations == nil {
|
||||
deployment.Annotations = make(map[string]string)
|
||||
}
|
||||
if deployment.Annotations[RevisionAnnotation] != revision {
|
||||
deployment.Annotations[RevisionAnnotation] = revision
|
||||
updated = true
|
||||
}
|
||||
|
||||
return updated
|
||||
}
|
||||
|
||||
// MaxRevision finds the highest revision in the replica sets
|
||||
func MaxRevision(allRSs []*apps.ReplicaSet) int64 {
|
||||
max := int64(0)
|
||||
for _, rs := range allRSs {
|
||||
if v, err := Revision(rs); err != nil {
|
||||
// Skip the replica sets when it failed to parse their revision information
|
||||
klog.V(4).Infof("Error: %v. Couldn't parse revision for replica set %#v, deployment controller will skip it when reconciling revisions.", err, rs)
|
||||
} else if v > max {
|
||||
max = v
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
// LastRevision finds the second max revision number in all replica sets (the last revision)
|
||||
func LastRevision(allRSs []*apps.ReplicaSet) int64 {
|
||||
max, secMax := int64(0), int64(0)
|
||||
for _, rs := range allRSs {
|
||||
if v, err := Revision(rs); err != nil {
|
||||
// Skip the replica sets when it failed to parse their revision information
|
||||
klog.V(4).Infof("Error: %v. Couldn't parse revision for replica set %#v, deployment controller will skip it when reconciling revisions.", err, rs)
|
||||
} else if v >= max {
|
||||
secMax = max
|
||||
max = v
|
||||
} else if v > secMax {
|
||||
secMax = v
|
||||
}
|
||||
}
|
||||
return secMax
|
||||
}
|
||||
|
||||
// Revision returns the revision number of the input object.
|
||||
func Revision(obj runtime.Object) (int64, error) {
|
||||
acc, err := meta.Accessor(obj)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
v, ok := acc.GetAnnotations()[RevisionAnnotation]
|
||||
if !ok {
|
||||
return 0, nil
|
||||
}
|
||||
return strconv.ParseInt(v, 10, 64)
|
||||
}
|
||||
|
||||
// SetNewReplicaSetAnnotations sets new replica set's annotations appropriately by updating its revision and
|
||||
// copying required deployment annotations to it; it returns true if replica set's annotation is changed.
|
||||
func SetNewReplicaSetAnnotations(deployment *apps.Deployment, newRS *apps.ReplicaSet, newRevision string, exists bool) bool {
|
||||
// First, copy deployment's annotations (except for apply and revision annotations)
|
||||
annotationChanged := copyDeploymentAnnotationsToReplicaSet(deployment, newRS)
|
||||
// Then, update replica set's revision annotation
|
||||
if newRS.Annotations == nil {
|
||||
newRS.Annotations = make(map[string]string)
|
||||
}
|
||||
oldRevision, ok := newRS.Annotations[RevisionAnnotation]
|
||||
// The newRS's revision should be the greatest among all RSes. Usually, its revision number is newRevision (the max revision number
|
||||
// of all old RSes + 1). However, it's possible that some of the old RSes are deleted after the newRS revision being updated, and
|
||||
// newRevision becomes smaller than newRS's revision. We should only update newRS revision when it's smaller than newRevision.
|
||||
|
||||
oldRevisionInt, err := strconv.ParseInt(oldRevision, 10, 64)
|
||||
if err != nil {
|
||||
if oldRevision != "" {
|
||||
klog.Warningf("Updating replica set revision OldRevision not int %s", err)
|
||||
return false
|
||||
}
|
||||
//If the RS annotation is empty then initialise it to 0
|
||||
oldRevisionInt = 0
|
||||
}
|
||||
newRevisionInt, err := strconv.ParseInt(newRevision, 10, 64)
|
||||
if err != nil {
|
||||
klog.Warningf("Updating replica set revision NewRevision not int %s", err)
|
||||
return false
|
||||
}
|
||||
if oldRevisionInt < newRevisionInt {
|
||||
newRS.Annotations[RevisionAnnotation] = newRevision
|
||||
annotationChanged = true
|
||||
klog.V(4).Infof("Updating replica set %q revision to %s", newRS.Name, newRevision)
|
||||
}
|
||||
// If a revision annotation already existed and this replica set was updated with a new revision
|
||||
// then that means we are rolling back to this replica set. We need to preserve the old revisions
|
||||
// for historical information.
|
||||
if ok && annotationChanged {
|
||||
revisionHistoryAnnotation := newRS.Annotations[RevisionHistoryAnnotation]
|
||||
oldRevisions := strings.Split(revisionHistoryAnnotation, ",")
|
||||
if len(oldRevisions[0]) == 0 {
|
||||
newRS.Annotations[RevisionHistoryAnnotation] = oldRevision
|
||||
} else {
|
||||
oldRevisions = append(oldRevisions, oldRevision)
|
||||
newRS.Annotations[RevisionHistoryAnnotation] = strings.Join(oldRevisions, ",")
|
||||
}
|
||||
}
|
||||
// If the new replica set is about to be created, we need to add replica annotations to it.
|
||||
if !exists && SetReplicasAnnotations(newRS, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+MaxSurge(*deployment)) {
|
||||
annotationChanged = true
|
||||
}
|
||||
return annotationChanged
|
||||
}
|
||||
|
||||
var annotationsToSkip = map[string]bool{
|
||||
v1.LastAppliedConfigAnnotation: true,
|
||||
RevisionAnnotation: true,
|
||||
RevisionHistoryAnnotation: true,
|
||||
DesiredReplicasAnnotation: true,
|
||||
MaxReplicasAnnotation: true,
|
||||
apps.DeprecatedRollbackTo: true,
|
||||
}
|
||||
|
||||
// skipCopyAnnotation returns true if we should skip copying the annotation with the given annotation key
|
||||
// TODO: How to decide which annotations should / should not be copied?
|
||||
// See https://github.com/kubernetes/kubernetes/pull/20035#issuecomment-179558615
|
||||
func skipCopyAnnotation(key string) bool {
|
||||
return annotationsToSkip[key]
|
||||
}
|
||||
|
||||
// copyDeploymentAnnotationsToReplicaSet copies deployment's annotations to replica set's annotations,
|
||||
// and returns true if replica set's annotation is changed.
|
||||
// Note that apply and revision annotations are not copied.
|
||||
func copyDeploymentAnnotationsToReplicaSet(deployment *apps.Deployment, rs *apps.ReplicaSet) bool {
|
||||
rsAnnotationsChanged := false
|
||||
if rs.Annotations == nil {
|
||||
rs.Annotations = make(map[string]string)
|
||||
}
|
||||
for k, v := range deployment.Annotations {
|
||||
// newRS revision is updated automatically in getNewReplicaSet, and the deployment's revision number is then updated
|
||||
// by copying its newRS revision number. We should not copy deployment's revision to its newRS, since the update of
|
||||
// deployment revision number may fail (revision becomes stale) and the revision number in newRS is more reliable.
|
||||
if skipCopyAnnotation(k) || rs.Annotations[k] == v {
|
||||
continue
|
||||
}
|
||||
rs.Annotations[k] = v
|
||||
rsAnnotationsChanged = true
|
||||
}
|
||||
return rsAnnotationsChanged
|
||||
}
|
||||
|
||||
// SetDeploymentAnnotationsTo sets deployment's annotations as given RS's annotations.
|
||||
// This action should be done if and only if the deployment is rolling back to this rs.
|
||||
// Note that apply and revision annotations are not changed.
|
||||
func SetDeploymentAnnotationsTo(deployment *apps.Deployment, rollbackToRS *apps.ReplicaSet) {
|
||||
deployment.Annotations = getSkippedAnnotations(deployment.Annotations)
|
||||
for k, v := range rollbackToRS.Annotations {
|
||||
if !skipCopyAnnotation(k) {
|
||||
deployment.Annotations[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getSkippedAnnotations(annotations map[string]string) map[string]string {
|
||||
skippedAnnotations := make(map[string]string)
|
||||
for k, v := range annotations {
|
||||
if skipCopyAnnotation(k) {
|
||||
skippedAnnotations[k] = v
|
||||
}
|
||||
}
|
||||
return skippedAnnotations
|
||||
}
|
||||
|
||||
// FindActiveOrLatest returns the only active or the latest replica set in case there is at most one active
|
||||
// replica set. If there are more active replica sets, then we should proportionally scale them.
|
||||
func FindActiveOrLatest(newRS *apps.ReplicaSet, oldRSs []*apps.ReplicaSet) *apps.ReplicaSet {
|
||||
if newRS == nil && len(oldRSs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
sort.Sort(sort.Reverse(controller.ReplicaSetsByCreationTimestamp(oldRSs)))
|
||||
allRSs := controller.FilterActiveReplicaSets(append(oldRSs, newRS))
|
||||
|
||||
switch len(allRSs) {
|
||||
case 0:
|
||||
// If there is no active replica set then we should return the newest.
|
||||
if newRS != nil {
|
||||
return newRS
|
||||
}
|
||||
return oldRSs[0]
|
||||
case 1:
|
||||
return allRSs[0]
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// GetDesiredReplicasAnnotation returns the number of desired replicas
|
||||
func GetDesiredReplicasAnnotation(rs *apps.ReplicaSet) (int32, bool) {
|
||||
return getIntFromAnnotation(rs, DesiredReplicasAnnotation)
|
||||
}
|
||||
|
||||
func getMaxReplicasAnnotation(rs *apps.ReplicaSet) (int32, bool) {
|
||||
return getIntFromAnnotation(rs, MaxReplicasAnnotation)
|
||||
}
|
||||
|
||||
func getIntFromAnnotation(rs *apps.ReplicaSet, annotationKey string) (int32, bool) {
|
||||
annotationValue, ok := rs.Annotations[annotationKey]
|
||||
if !ok {
|
||||
return int32(0), false
|
||||
}
|
||||
intValue, err := strconv.Atoi(annotationValue)
|
||||
if err != nil {
|
||||
klog.V(2).Infof("Cannot convert the value %q with annotation key %q for the replica set %q", annotationValue, annotationKey, rs.Name)
|
||||
return int32(0), false
|
||||
}
|
||||
return int32(intValue), true
|
||||
}
|
||||
|
||||
// SetReplicasAnnotations sets the desiredReplicas and maxReplicas into the annotations
|
||||
func SetReplicasAnnotations(rs *apps.ReplicaSet, desiredReplicas, maxReplicas int32) bool {
|
||||
updated := false
|
||||
if rs.Annotations == nil {
|
||||
rs.Annotations = make(map[string]string)
|
||||
}
|
||||
desiredString := fmt.Sprintf("%d", desiredReplicas)
|
||||
if hasString := rs.Annotations[DesiredReplicasAnnotation]; hasString != desiredString {
|
||||
rs.Annotations[DesiredReplicasAnnotation] = desiredString
|
||||
updated = true
|
||||
}
|
||||
maxString := fmt.Sprintf("%d", maxReplicas)
|
||||
if hasString := rs.Annotations[MaxReplicasAnnotation]; hasString != maxString {
|
||||
rs.Annotations[MaxReplicasAnnotation] = maxString
|
||||
updated = true
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// ReplicasAnnotationsNeedUpdate return true if ReplicasAnnotations need to be updated
|
||||
func ReplicasAnnotationsNeedUpdate(rs *apps.ReplicaSet, desiredReplicas, maxReplicas int32) bool {
|
||||
if rs.Annotations == nil {
|
||||
return true
|
||||
}
|
||||
desiredString := fmt.Sprintf("%d", desiredReplicas)
|
||||
if hasString := rs.Annotations[DesiredReplicasAnnotation]; hasString != desiredString {
|
||||
return true
|
||||
}
|
||||
maxString := fmt.Sprintf("%d", maxReplicas)
|
||||
if hasString := rs.Annotations[MaxReplicasAnnotation]; hasString != maxString {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// MaxUnavailable returns the maximum unavailable pods a rolling deployment can take.
|
||||
func MaxUnavailable(deployment apps.Deployment) int32 {
|
||||
if !IsRollingUpdate(&deployment) || *(deployment.Spec.Replicas) == 0 {
|
||||
return int32(0)
|
||||
}
|
||||
// Error caught by validation
|
||||
_, maxUnavailable, _ := ResolveFenceposts(deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, *(deployment.Spec.Replicas))
|
||||
if maxUnavailable > *deployment.Spec.Replicas {
|
||||
return *deployment.Spec.Replicas
|
||||
}
|
||||
return maxUnavailable
|
||||
}
|
||||
|
||||
// MinAvailable returns the minimum available pods of a given deployment
|
||||
func MinAvailable(deployment *apps.Deployment) int32 {
|
||||
if !IsRollingUpdate(deployment) {
|
||||
return int32(0)
|
||||
}
|
||||
return *(deployment.Spec.Replicas) - MaxUnavailable(*deployment)
|
||||
}
|
||||
|
||||
// MaxSurge returns the maximum surge pods a rolling deployment can take.
|
||||
func MaxSurge(deployment apps.Deployment) int32 {
|
||||
if !IsRollingUpdate(&deployment) {
|
||||
return int32(0)
|
||||
}
|
||||
// Error caught by validation
|
||||
maxSurge, _, _ := ResolveFenceposts(deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, *(deployment.Spec.Replicas))
|
||||
return maxSurge
|
||||
}
|
||||
|
||||
// GetProportion will estimate the proportion for the provided replica set using 1. the current size
|
||||
// of the parent deployment, 2. the replica count that needs be added on the replica sets of the
|
||||
// deployment, and 3. the total replicas added in the replica sets of the deployment so far.
|
||||
func GetProportion(rs *apps.ReplicaSet, d apps.Deployment, deploymentReplicasToAdd, deploymentReplicasAdded int32) int32 {
|
||||
if rs == nil || *(rs.Spec.Replicas) == 0 || deploymentReplicasToAdd == 0 || deploymentReplicasToAdd == deploymentReplicasAdded {
|
||||
return int32(0)
|
||||
}
|
||||
|
||||
rsFraction := getReplicaSetFraction(*rs, d)
|
||||
allowed := deploymentReplicasToAdd - deploymentReplicasAdded
|
||||
|
||||
if deploymentReplicasToAdd > 0 {
|
||||
// Use the minimum between the replica set fraction and the maximum allowed replicas
|
||||
// when scaling up. This way we ensure we will not scale up more than the allowed
|
||||
// replicas we can add.
|
||||
return integer.Int32Min(rsFraction, allowed)
|
||||
}
|
||||
// Use the maximum between the replica set fraction and the maximum allowed replicas
|
||||
// when scaling down. This way we ensure we will not scale down more than the allowed
|
||||
// replicas we can remove.
|
||||
return integer.Int32Max(rsFraction, allowed)
|
||||
}
|
||||
|
||||
// getReplicaSetFraction estimates the fraction of replicas a replica set can have in
|
||||
// 1. a scaling event during a rollout or 2. when scaling a paused deployment.
|
||||
func getReplicaSetFraction(rs apps.ReplicaSet, d apps.Deployment) int32 {
|
||||
// If we are scaling down to zero then the fraction of this replica set is its whole size (negative)
|
||||
if *(d.Spec.Replicas) == int32(0) {
|
||||
return -*(rs.Spec.Replicas)
|
||||
}
|
||||
|
||||
deploymentReplicas := *(d.Spec.Replicas) + MaxSurge(d)
|
||||
annotatedReplicas, ok := getMaxReplicasAnnotation(&rs)
|
||||
if !ok {
|
||||
// If we cannot find the annotation then fallback to the current deployment size. Note that this
|
||||
// will not be an accurate proportion estimation in case other replica sets have different values
|
||||
// which means that the deployment was scaled at some point but we at least will stay in limits
|
||||
// due to the min-max comparisons in getProportion.
|
||||
annotatedReplicas = d.Status.Replicas
|
||||
}
|
||||
|
||||
// We should never proportionally scale up from zero which means rs.spec.replicas and annotatedReplicas
|
||||
// will never be zero here.
|
||||
newRSsize := (float64(*(rs.Spec.Replicas) * deploymentReplicas)) / float64(annotatedReplicas)
|
||||
return integer.RoundToInt32(newRSsize) - *(rs.Spec.Replicas)
|
||||
}
|
||||
|
||||
// GetAllReplicaSets returns the old and new replica sets targeted by the given Deployment. It gets PodList and ReplicaSetList from client interface.
|
||||
// Note that the first set of old replica sets doesn't include the ones with no pods, and the second set of old replica sets include all old replica sets.
|
||||
// The third returned value is the new replica set, and it may be nil if it doesn't exist yet.
|
||||
func GetAllReplicaSets(deployment *apps.Deployment, c appsclient.AppsV1Interface) ([]*apps.ReplicaSet, []*apps.ReplicaSet, *apps.ReplicaSet, error) {
|
||||
rsList, err := ListReplicaSets(deployment, RsListFromClient(c))
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
oldRSes, allOldRSes := FindOldReplicaSets(deployment, rsList)
|
||||
newRS := FindNewReplicaSet(deployment, rsList)
|
||||
return oldRSes, allOldRSes, newRS, nil
|
||||
}
|
||||
|
||||
// GetOldReplicaSets returns the old replica sets targeted by the given Deployment; get PodList and ReplicaSetList from client interface.
|
||||
// Note that the first set of old replica sets doesn't include the ones with no pods, and the second set of old replica sets include all old replica sets.
|
||||
func GetOldReplicaSets(deployment *apps.Deployment, c appsclient.AppsV1Interface) ([]*apps.ReplicaSet, []*apps.ReplicaSet, error) {
|
||||
rsList, err := ListReplicaSets(deployment, RsListFromClient(c))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
oldRSes, allOldRSes := FindOldReplicaSets(deployment, rsList)
|
||||
return oldRSes, allOldRSes, nil
|
||||
}
|
||||
|
||||
// GetNewReplicaSet returns a replica set that matches the intent of the given deployment; get ReplicaSetList from client interface.
|
||||
// Returns nil if the new replica set doesn't exist yet.
|
||||
func GetNewReplicaSet(deployment *apps.Deployment, c appsclient.AppsV1Interface) (*apps.ReplicaSet, error) {
|
||||
rsList, err := ListReplicaSets(deployment, RsListFromClient(c))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return FindNewReplicaSet(deployment, rsList), nil
|
||||
}
|
||||
|
||||
// RsListFromClient returns an rsListFunc that wraps the given client.
|
||||
func RsListFromClient(c appsclient.AppsV1Interface) RsListFunc {
|
||||
return func(namespace string, options metav1.ListOptions) ([]*apps.ReplicaSet, error) {
|
||||
rsList, err := c.ReplicaSets(namespace).List(options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var ret []*apps.ReplicaSet
|
||||
for i := range rsList.Items {
|
||||
ret = append(ret, &rsList.Items[i])
|
||||
}
|
||||
return ret, err
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: switch RsListFunc and podListFunc to full namespacers
|
||||
|
||||
// RsListFunc returns the ReplicaSet from the ReplicaSet namespace and the List metav1.ListOptions.
|
||||
type RsListFunc func(string, metav1.ListOptions) ([]*apps.ReplicaSet, error)
|
||||
|
||||
// podListFunc returns the PodList from the Pod namespace and the List metav1.ListOptions.
|
||||
type podListFunc func(string, metav1.ListOptions) (*v1.PodList, error)
|
||||
|
||||
// ListReplicaSets returns a slice of RSes the given deployment targets.
|
||||
// Note that this does NOT attempt to reconcile ControllerRef (adopt/orphan),
|
||||
// because only the controller itself should do that.
|
||||
// However, it does filter out anything whose ControllerRef doesn't match.
|
||||
func ListReplicaSets(deployment *apps.Deployment, getRSList RsListFunc) ([]*apps.ReplicaSet, error) {
|
||||
// TODO: Right now we list replica sets by their labels. We should list them by selector, i.e. the replica set's selector
|
||||
// should be a superset of the deployment's selector, see https://github.com/kubernetes/kubernetes/issues/19830.
|
||||
namespace := deployment.Namespace
|
||||
selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
options := metav1.ListOptions{LabelSelector: selector.String()}
|
||||
all, err := getRSList(namespace, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Only include those whose ControllerRef matches the Deployment.
|
||||
owned := make([]*apps.ReplicaSet, 0, len(all))
|
||||
for _, rs := range all {
|
||||
if metav1.IsControlledBy(rs, deployment) {
|
||||
owned = append(owned, rs)
|
||||
}
|
||||
}
|
||||
return owned, nil
|
||||
}
|
||||
|
||||
// ListPods returns a list of pods the given deployment targets.
|
||||
// This needs a list of ReplicaSets for the Deployment,
|
||||
// which can be found with ListReplicaSets().
|
||||
// Note that this does NOT attempt to reconcile ControllerRef (adopt/orphan),
|
||||
// because only the controller itself should do that.
|
||||
// However, it does filter out anything whose ControllerRef doesn't match.
|
||||
func ListPods(deployment *apps.Deployment, rsList []*apps.ReplicaSet, getPodList podListFunc) (*v1.PodList, error) {
|
||||
namespace := deployment.Namespace
|
||||
selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
options := metav1.ListOptions{LabelSelector: selector.String()}
|
||||
all, err := getPodList(namespace, options)
|
||||
if err != nil {
|
||||
return all, err
|
||||
}
|
||||
// Only include those whose ControllerRef points to a ReplicaSet that is in
|
||||
// turn owned by this Deployment.
|
||||
rsMap := make(map[types.UID]bool, len(rsList))
|
||||
for _, rs := range rsList {
|
||||
rsMap[rs.UID] = true
|
||||
}
|
||||
owned := &v1.PodList{Items: make([]v1.Pod, 0, len(all.Items))}
|
||||
for i := range all.Items {
|
||||
pod := &all.Items[i]
|
||||
controllerRef := metav1.GetControllerOf(pod)
|
||||
if controllerRef != nil && rsMap[controllerRef.UID] {
|
||||
owned.Items = append(owned.Items, *pod)
|
||||
}
|
||||
}
|
||||
return owned, nil
|
||||
}
|
||||
|
||||
// EqualIgnoreHash returns true if two given podTemplateSpec are equal, ignoring the diff in value of Labels[pod-template-hash]
|
||||
// We ignore pod-template-hash because:
|
||||
// 1. The hash result would be different upon podTemplateSpec API changes
|
||||
// (e.g. the addition of a new field will cause the hash code to change)
|
||||
// 2. The deployment template won't have hash labels
|
||||
func EqualIgnoreHash(template1, template2 *v1.PodTemplateSpec) bool {
|
||||
t1Copy := template1.DeepCopy()
|
||||
t2Copy := template2.DeepCopy()
|
||||
// Remove hash labels from template.Labels before comparing
|
||||
delete(t1Copy.Labels, apps.DefaultDeploymentUniqueLabelKey)
|
||||
delete(t2Copy.Labels, apps.DefaultDeploymentUniqueLabelKey)
|
||||
return apiequality.Semantic.DeepEqual(t1Copy, t2Copy)
|
||||
}
|
||||
|
||||
// FindNewReplicaSet returns the new RS this given deployment targets (the one with the same pod template).
|
||||
func FindNewReplicaSet(deployment *apps.Deployment, rsList []*apps.ReplicaSet) *apps.ReplicaSet {
|
||||
sort.Sort(controller.ReplicaSetsByCreationTimestamp(rsList))
|
||||
for i := range rsList {
|
||||
if EqualIgnoreHash(&rsList[i].Spec.Template, &deployment.Spec.Template) {
|
||||
// In rare cases, such as after cluster upgrades, Deployment may end up with
|
||||
// having more than one new ReplicaSets that have the same template as its template,
|
||||
// see https://github.com/kubernetes/kubernetes/issues/40415
|
||||
// We deterministically choose the oldest new ReplicaSet.
|
||||
return rsList[i]
|
||||
}
|
||||
}
|
||||
// new ReplicaSet does not exist.
|
||||
return nil
|
||||
}
|
||||
|
||||
// FindOldReplicaSets returns the old replica sets targeted by the given Deployment, with the given slice of RSes.
|
||||
// Note that the first set of old replica sets doesn't include the ones with no pods, and the second set of old replica sets include all old replica sets.
|
||||
func FindOldReplicaSets(deployment *apps.Deployment, rsList []*apps.ReplicaSet) ([]*apps.ReplicaSet, []*apps.ReplicaSet) {
|
||||
var requiredRSs []*apps.ReplicaSet
|
||||
var allRSs []*apps.ReplicaSet
|
||||
newRS := FindNewReplicaSet(deployment, rsList)
|
||||
for _, rs := range rsList {
|
||||
// Filter out new replica set
|
||||
if newRS != nil && rs.UID == newRS.UID {
|
||||
continue
|
||||
}
|
||||
allRSs = append(allRSs, rs)
|
||||
if *(rs.Spec.Replicas) != 0 {
|
||||
requiredRSs = append(requiredRSs, rs)
|
||||
}
|
||||
}
|
||||
return requiredRSs, allRSs
|
||||
}
|
||||
|
||||
// SetFromReplicaSetTemplate sets the desired PodTemplateSpec from a replica set template to the given deployment.
|
||||
func SetFromReplicaSetTemplate(deployment *apps.Deployment, template v1.PodTemplateSpec) *apps.Deployment {
|
||||
deployment.Spec.Template.ObjectMeta = template.ObjectMeta
|
||||
deployment.Spec.Template.Spec = template.Spec
|
||||
deployment.Spec.Template.ObjectMeta.Labels = labelsutil.CloneAndRemoveLabel(
|
||||
deployment.Spec.Template.ObjectMeta.Labels,
|
||||
apps.DefaultDeploymentUniqueLabelKey)
|
||||
return deployment
|
||||
}
|
||||
|
||||
// GetReplicaCountForReplicaSets returns the sum of Replicas of the given replica sets.
|
||||
func GetReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 {
|
||||
totalReplicas := int32(0)
|
||||
for _, rs := range replicaSets {
|
||||
if rs != nil {
|
||||
totalReplicas += *(rs.Spec.Replicas)
|
||||
}
|
||||
}
|
||||
return totalReplicas
|
||||
}
|
||||
|
||||
// GetActualReplicaCountForReplicaSets returns the sum of actual replicas of the given replica sets.
|
||||
func GetActualReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 {
|
||||
totalActualReplicas := int32(0)
|
||||
for _, rs := range replicaSets {
|
||||
if rs != nil {
|
||||
totalActualReplicas += rs.Status.Replicas
|
||||
}
|
||||
}
|
||||
return totalActualReplicas
|
||||
}
|
||||
|
||||
// GetReadyReplicaCountForReplicaSets returns the number of ready pods corresponding to the given replica sets.
|
||||
func GetReadyReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 {
|
||||
totalReadyReplicas := int32(0)
|
||||
for _, rs := range replicaSets {
|
||||
if rs != nil {
|
||||
totalReadyReplicas += rs.Status.ReadyReplicas
|
||||
}
|
||||
}
|
||||
return totalReadyReplicas
|
||||
}
|
||||
|
||||
// GetAvailableReplicaCountForReplicaSets returns the number of available pods corresponding to the given replica sets.
|
||||
func GetAvailableReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 {
|
||||
totalAvailableReplicas := int32(0)
|
||||
for _, rs := range replicaSets {
|
||||
if rs != nil {
|
||||
totalAvailableReplicas += rs.Status.AvailableReplicas
|
||||
}
|
||||
}
|
||||
return totalAvailableReplicas
|
||||
}
|
||||
|
||||
// IsRollingUpdate returns true if the strategy type is a rolling update.
|
||||
func IsRollingUpdate(deployment *apps.Deployment) bool {
|
||||
return deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType
|
||||
}
|
||||
|
||||
// DeploymentComplete considers a deployment to be complete once all of its desired replicas
|
||||
// are updated and available, and no old pods are running.
|
||||
func DeploymentComplete(deployment *apps.Deployment, newStatus *apps.DeploymentStatus) bool {
|
||||
return newStatus.UpdatedReplicas == *(deployment.Spec.Replicas) &&
|
||||
newStatus.Replicas == *(deployment.Spec.Replicas) &&
|
||||
newStatus.AvailableReplicas == *(deployment.Spec.Replicas) &&
|
||||
newStatus.ObservedGeneration >= deployment.Generation
|
||||
}
|
||||
|
||||
// DeploymentProgressing reports progress for a deployment. Progress is estimated by comparing the
|
||||
// current with the new status of the deployment that the controller is observing. More specifically,
|
||||
// when new pods are scaled up or become ready or available, or old pods are scaled down, then we
|
||||
// consider the deployment is progressing.
|
||||
func DeploymentProgressing(deployment *apps.Deployment, newStatus *apps.DeploymentStatus) bool {
|
||||
oldStatus := deployment.Status
|
||||
|
||||
// Old replicas that need to be scaled down
|
||||
oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas
|
||||
newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas
|
||||
|
||||
return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) ||
|
||||
(newStatusOldReplicas < oldStatusOldReplicas) ||
|
||||
newStatus.ReadyReplicas > deployment.Status.ReadyReplicas ||
|
||||
newStatus.AvailableReplicas > deployment.Status.AvailableReplicas
|
||||
}
|
||||
|
||||
// used for unit testing
|
||||
var nowFn = func() time.Time { return time.Now() }
|
||||
|
||||
// DeploymentTimedOut considers a deployment to have timed out once its condition that reports progress
|
||||
// is older than progressDeadlineSeconds or a Progressing condition with a TimedOutReason reason already
|
||||
// exists.
|
||||
func DeploymentTimedOut(deployment *apps.Deployment, newStatus *apps.DeploymentStatus) bool {
|
||||
if !HasProgressDeadline(deployment) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Look for the Progressing condition. If it doesn't exist, we have no base to estimate progress.
|
||||
// If it's already set with a TimedOutReason reason, we have already timed out, no need to check
|
||||
// again.
|
||||
condition := GetDeploymentCondition(*newStatus, apps.DeploymentProgressing)
|
||||
if condition == nil {
|
||||
return false
|
||||
}
|
||||
// If the previous condition has been a successful rollout then we shouldn't try to
|
||||
// estimate any progress. Scenario:
|
||||
//
|
||||
// * progressDeadlineSeconds is smaller than the difference between now and the time
|
||||
// the last rollout finished in the past.
|
||||
// * the creation of a new ReplicaSet triggers a resync of the Deployment prior to the
|
||||
// cached copy of the Deployment getting updated with the status.condition that indicates
|
||||
// the creation of the new ReplicaSet.
|
||||
//
|
||||
// The Deployment will be resynced and eventually its Progressing condition will catch
|
||||
// up with the state of the world.
|
||||
if condition.Reason == NewRSAvailableReason {
|
||||
return false
|
||||
}
|
||||
if condition.Reason == TimedOutReason {
|
||||
return true
|
||||
}
|
||||
|
||||
// Look at the difference in seconds between now and the last time we reported any
|
||||
// progress or tried to create a replica set, or resumed a paused deployment and
|
||||
// compare against progressDeadlineSeconds.
|
||||
from := condition.LastUpdateTime
|
||||
now := nowFn()
|
||||
delta := time.Duration(*deployment.Spec.ProgressDeadlineSeconds) * time.Second
|
||||
timedOut := from.Add(delta).Before(now)
|
||||
|
||||
klog.V(4).Infof("Deployment %q timed out (%t) [last progress check: %v - now: %v]", deployment.Name, timedOut, from, now)
|
||||
return timedOut
|
||||
}
|
||||
|
||||
// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have.
|
||||
// When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it.
|
||||
// 1) The new RS is saturated: newRS's replicas == deployment's replicas
|
||||
// 2) Max number of pods allowed is reached: deployment's replicas + maxSurge == all RSs' replicas
|
||||
func NewRSNewReplicas(deployment *apps.Deployment, allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet) (int32, error) {
|
||||
switch deployment.Spec.Strategy.Type {
|
||||
case apps.RollingUpdateDeploymentStrategyType:
|
||||
// Check if we can scale up.
|
||||
maxSurge, err := intstrutil.GetValueFromIntOrPercent(deployment.Spec.Strategy.RollingUpdate.MaxSurge, int(*(deployment.Spec.Replicas)), true)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
// Find the total number of pods
|
||||
currentPodCount := GetReplicaCountForReplicaSets(allRSs)
|
||||
maxTotalPods := *(deployment.Spec.Replicas) + int32(maxSurge)
|
||||
if currentPodCount >= maxTotalPods {
|
||||
// Cannot scale up.
|
||||
return *(newRS.Spec.Replicas), nil
|
||||
}
|
||||
// Scale up.
|
||||
scaleUpCount := maxTotalPods - currentPodCount
|
||||
// Do not exceed the number of desired replicas.
|
||||
scaleUpCount = int32(integer.IntMin(int(scaleUpCount), int(*(deployment.Spec.Replicas)-*(newRS.Spec.Replicas))))
|
||||
return *(newRS.Spec.Replicas) + scaleUpCount, nil
|
||||
case apps.RecreateDeploymentStrategyType:
|
||||
return *(deployment.Spec.Replicas), nil
|
||||
default:
|
||||
return 0, fmt.Errorf("deployment type %v isn't supported", deployment.Spec.Strategy.Type)
|
||||
}
|
||||
}
|
||||
|
||||
// IsSaturated checks if the new replica set is saturated by comparing its size with its deployment size.
|
||||
// Both the deployment and the replica set have to believe this replica set can own all of the desired
|
||||
// replicas in the deployment and the annotation helps in achieving that. All pods of the ReplicaSet
|
||||
// need to be available.
|
||||
func IsSaturated(deployment *apps.Deployment, rs *apps.ReplicaSet) bool {
|
||||
if rs == nil {
|
||||
return false
|
||||
}
|
||||
desiredString := rs.Annotations[DesiredReplicasAnnotation]
|
||||
desired, err := strconv.Atoi(desiredString)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return *(rs.Spec.Replicas) == *(deployment.Spec.Replicas) &&
|
||||
int32(desired) == *(deployment.Spec.Replicas) &&
|
||||
rs.Status.AvailableReplicas == *(deployment.Spec.Replicas)
|
||||
}
|
||||
|
||||
// WaitForObservedDeployment polls for deployment to be updated so that deployment.Status.ObservedGeneration >= desiredGeneration.
|
||||
// Returns error if polling timesout.
|
||||
func WaitForObservedDeployment(getDeploymentFunc func() (*apps.Deployment, error), desiredGeneration int64, interval, timeout time.Duration) error {
|
||||
// TODO: This should take clientset.Interface when all code is updated to use clientset. Keeping it this way allows the function to be used by callers who have client.Interface.
|
||||
return wait.PollImmediate(interval, timeout, func() (bool, error) {
|
||||
deployment, err := getDeploymentFunc()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return deployment.Status.ObservedGeneration >= desiredGeneration, nil
|
||||
})
|
||||
}
|
||||
|
||||
// ResolveFenceposts resolves both maxSurge and maxUnavailable. This needs to happen in one
|
||||
// step. For example:
|
||||
//
|
||||
// 2 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1), then old(-1), then new(+1)
|
||||
// 1 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1)
|
||||
// 2 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1)
|
||||
// 1 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1)
|
||||
// 2 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1)
|
||||
// 1 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1)
|
||||
func ResolveFenceposts(maxSurge, maxUnavailable *intstrutil.IntOrString, desired int32) (int32, int32, error) {
|
||||
surge, err := intstrutil.GetValueFromIntOrPercent(intstrutil.ValueOrDefault(maxSurge, intstrutil.FromInt(0)), int(desired), true)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
unavailable, err := intstrutil.GetValueFromIntOrPercent(intstrutil.ValueOrDefault(maxUnavailable, intstrutil.FromInt(0)), int(desired), false)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
if surge == 0 && unavailable == 0 {
|
||||
// Validation should never allow the user to explicitly use zero values for both maxSurge
|
||||
// maxUnavailable. Due to rounding down maxUnavailable though, it may resolve to zero.
|
||||
// If both fenceposts resolve to zero, then we should set maxUnavailable to 1 on the
|
||||
// theory that surge might not work due to quota.
|
||||
unavailable = 1
|
||||
}
|
||||
|
||||
return int32(surge), int32(unavailable), nil
|
||||
}
|
||||
|
||||
// HasProgressDeadline checks if the Deployment d is expected to surface the reason
|
||||
// "ProgressDeadlineExceeded" when the Deployment progress takes longer than expected time.
|
||||
func HasProgressDeadline(d *apps.Deployment) bool {
|
||||
return d.Spec.ProgressDeadlineSeconds != nil && *d.Spec.ProgressDeadlineSeconds != math.MaxInt32
|
||||
}
|
||||
|
||||
// HasRevisionHistoryLimit checks if the Deployment d is expected to keep a specified number of
|
||||
// old replicaSets. These replicaSets are mainly kept with the purpose of rollback.
|
||||
// The RevisionHistoryLimit can start from 0 (no retained replicasSet). When set to math.MaxInt32,
|
||||
// the Deployment will keep all revisions.
|
||||
func HasRevisionHistoryLimit(d *apps.Deployment) bool {
|
||||
return d.Spec.RevisionHistoryLimit != nil && *d.Spec.RevisionHistoryLimit != math.MaxInt32
|
||||
}
|
19
vendor/k8s.io/kubernetes/pkg/controller/doc.go
generated
vendored
Normal file
19
vendor/k8s.io/kubernetes/pkg/controller/doc.go
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package controller contains code for controllers (like the replication
|
||||
// controller).
|
||||
package controller // import "k8s.io/kubernetes/pkg/controller"
|
18
vendor/k8s.io/kubernetes/pkg/controller/job/doc.go
generated
vendored
Normal file
18
vendor/k8s.io/kubernetes/pkg/controller/job/doc.go
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package job contains logic for watching and synchronizing jobs.
|
||||
package job // import "k8s.io/kubernetes/pkg/controller/job"
|
871
vendor/k8s.io/kubernetes/pkg/controller/job/job_controller.go
generated
vendored
Normal file
871
vendor/k8s.io/kubernetes/pkg/controller/job/job_controller.go
generated
vendored
Normal file
@ -0,0 +1,871 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package job
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
batch "k8s.io/api/batch/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
batchinformers "k8s.io/client-go/informers/batch/v1"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
batchv1listers "k8s.io/client-go/listers/batch/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/util/metrics"
|
||||
"k8s.io/utils/integer"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const statusUpdateRetries = 3
|
||||
|
||||
// controllerKind contains the schema.GroupVersionKind for this controller type.
|
||||
var controllerKind = batch.SchemeGroupVersion.WithKind("Job")
|
||||
|
||||
var (
|
||||
// DefaultJobBackOff is the max backoff period, exported for the e2e test
|
||||
DefaultJobBackOff = 10 * time.Second
|
||||
// MaxJobBackOff is the max backoff period, exported for the e2e test
|
||||
MaxJobBackOff = 360 * time.Second
|
||||
)
|
||||
|
||||
type JobController struct {
|
||||
kubeClient clientset.Interface
|
||||
podControl controller.PodControlInterface
|
||||
|
||||
// To allow injection of updateJobStatus for testing.
|
||||
updateHandler func(job *batch.Job) error
|
||||
syncHandler func(jobKey string) (bool, error)
|
||||
// podStoreSynced returns true if the pod store has been synced at least once.
|
||||
// Added as a member to the struct to allow injection for testing.
|
||||
podStoreSynced cache.InformerSynced
|
||||
// jobStoreSynced returns true if the job store has been synced at least once.
|
||||
// Added as a member to the struct to allow injection for testing.
|
||||
jobStoreSynced cache.InformerSynced
|
||||
|
||||
// A TTLCache of pod creates/deletes each rc expects to see
|
||||
expectations controller.ControllerExpectationsInterface
|
||||
|
||||
// A store of jobs
|
||||
jobLister batchv1listers.JobLister
|
||||
|
||||
// A store of pods, populated by the podController
|
||||
podStore corelisters.PodLister
|
||||
|
||||
// Jobs that need to be updated
|
||||
queue workqueue.RateLimitingInterface
|
||||
|
||||
recorder record.EventRecorder
|
||||
}
|
||||
|
||||
func NewJobController(podInformer coreinformers.PodInformer, jobInformer batchinformers.JobInformer, kubeClient clientset.Interface) *JobController {
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
eventBroadcaster.StartLogging(klog.Infof)
|
||||
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
|
||||
|
||||
if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
|
||||
metrics.RegisterMetricAndTrackRateLimiterUsage("job_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter())
|
||||
}
|
||||
|
||||
jm := &JobController{
|
||||
kubeClient: kubeClient,
|
||||
podControl: controller.RealPodControl{
|
||||
KubeClient: kubeClient,
|
||||
Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
|
||||
},
|
||||
expectations: controller.NewControllerExpectations(),
|
||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job"),
|
||||
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
|
||||
}
|
||||
|
||||
jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(obj interface{}) {
|
||||
jm.enqueueController(obj, true)
|
||||
},
|
||||
UpdateFunc: jm.updateJob,
|
||||
DeleteFunc: func(obj interface{}) {
|
||||
jm.enqueueController(obj, true)
|
||||
},
|
||||
})
|
||||
jm.jobLister = jobInformer.Lister()
|
||||
jm.jobStoreSynced = jobInformer.Informer().HasSynced
|
||||
|
||||
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: jm.addPod,
|
||||
UpdateFunc: jm.updatePod,
|
||||
DeleteFunc: jm.deletePod,
|
||||
})
|
||||
jm.podStore = podInformer.Lister()
|
||||
jm.podStoreSynced = podInformer.Informer().HasSynced
|
||||
|
||||
jm.updateHandler = jm.updateJobStatus
|
||||
jm.syncHandler = jm.syncJob
|
||||
|
||||
return jm
|
||||
}
|
||||
|
||||
// Run the main goroutine responsible for watching and syncing jobs.
|
||||
func (jm *JobController) Run(workers int, stopCh <-chan struct{}) {
|
||||
defer utilruntime.HandleCrash()
|
||||
defer jm.queue.ShutDown()
|
||||
|
||||
klog.Infof("Starting job controller")
|
||||
defer klog.Infof("Shutting down job controller")
|
||||
|
||||
if !controller.WaitForCacheSync("job", stopCh, jm.podStoreSynced, jm.jobStoreSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < workers; i++ {
|
||||
go wait.Until(jm.worker, time.Second, stopCh)
|
||||
}
|
||||
|
||||
<-stopCh
|
||||
}
|
||||
|
||||
// getPodJobs returns a list of Jobs that potentially match a Pod.
|
||||
func (jm *JobController) getPodJobs(pod *v1.Pod) []*batch.Job {
|
||||
jobs, err := jm.jobLister.GetPodJobs(pod)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if len(jobs) > 1 {
|
||||
// ControllerRef will ensure we don't do anything crazy, but more than one
|
||||
// item in this list nevertheless constitutes user error.
|
||||
utilruntime.HandleError(fmt.Errorf("user error! more than one job is selecting pods with labels: %+v", pod.Labels))
|
||||
}
|
||||
ret := make([]*batch.Job, 0, len(jobs))
|
||||
for i := range jobs {
|
||||
ret = append(ret, &jobs[i])
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// resolveControllerRef returns the controller referenced by a ControllerRef,
|
||||
// or nil if the ControllerRef could not be resolved to a matching controller
|
||||
// of the correct Kind.
|
||||
func (jm *JobController) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *batch.Job {
|
||||
// We can't look up by UID, so look up by Name and then verify UID.
|
||||
// Don't even try to look up by Name if it's the wrong Kind.
|
||||
if controllerRef.Kind != controllerKind.Kind {
|
||||
return nil
|
||||
}
|
||||
job, err := jm.jobLister.Jobs(namespace).Get(controllerRef.Name)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if job.UID != controllerRef.UID {
|
||||
// The controller we found with this Name is not the same one that the
|
||||
// ControllerRef points to.
|
||||
return nil
|
||||
}
|
||||
return job
|
||||
}
|
||||
|
||||
// When a pod is created, enqueue the controller that manages it and update it's expectations.
|
||||
func (jm *JobController) addPod(obj interface{}) {
|
||||
pod := obj.(*v1.Pod)
|
||||
if pod.DeletionTimestamp != nil {
|
||||
// on a restart of the controller controller, it's possible a new pod shows up in a state that
|
||||
// is already pending deletion. Prevent the pod from being a creation observation.
|
||||
jm.deletePod(pod)
|
||||
return
|
||||
}
|
||||
|
||||
// If it has a ControllerRef, that's all that matters.
|
||||
if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil {
|
||||
job := jm.resolveControllerRef(pod.Namespace, controllerRef)
|
||||
if job == nil {
|
||||
return
|
||||
}
|
||||
jobKey, err := controller.KeyFunc(job)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
jm.expectations.CreationObserved(jobKey)
|
||||
jm.enqueueController(job, true)
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, it's an orphan. Get a list of all matching controllers and sync
|
||||
// them to see if anyone wants to adopt it.
|
||||
// DO NOT observe creation because no controller should be waiting for an
|
||||
// orphan.
|
||||
for _, job := range jm.getPodJobs(pod) {
|
||||
jm.enqueueController(job, true)
|
||||
}
|
||||
}
|
||||
|
||||
// When a pod is updated, figure out what job/s manage it and wake them up.
|
||||
// If the labels of the pod have changed we need to awaken both the old
|
||||
// and new job. old and cur must be *v1.Pod types.
|
||||
func (jm *JobController) updatePod(old, cur interface{}) {
|
||||
curPod := cur.(*v1.Pod)
|
||||
oldPod := old.(*v1.Pod)
|
||||
if curPod.ResourceVersion == oldPod.ResourceVersion {
|
||||
// Periodic resync will send update events for all known pods.
|
||||
// Two different versions of the same pod will always have different RVs.
|
||||
return
|
||||
}
|
||||
if curPod.DeletionTimestamp != nil {
|
||||
// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
|
||||
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
|
||||
// for modification of the deletion timestamp and expect an job to create more pods asap, not wait
|
||||
// until the kubelet actually deletes the pod.
|
||||
jm.deletePod(curPod)
|
||||
return
|
||||
}
|
||||
|
||||
// the only time we want the backoff to kick-in, is when the pod failed
|
||||
immediate := curPod.Status.Phase != v1.PodFailed
|
||||
|
||||
curControllerRef := metav1.GetControllerOf(curPod)
|
||||
oldControllerRef := metav1.GetControllerOf(oldPod)
|
||||
controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
|
||||
if controllerRefChanged && oldControllerRef != nil {
|
||||
// The ControllerRef was changed. Sync the old controller, if any.
|
||||
if job := jm.resolveControllerRef(oldPod.Namespace, oldControllerRef); job != nil {
|
||||
jm.enqueueController(job, immediate)
|
||||
}
|
||||
}
|
||||
|
||||
// If it has a ControllerRef, that's all that matters.
|
||||
if curControllerRef != nil {
|
||||
job := jm.resolveControllerRef(curPod.Namespace, curControllerRef)
|
||||
if job == nil {
|
||||
return
|
||||
}
|
||||
jm.enqueueController(job, immediate)
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, it's an orphan. If anything changed, sync matching controllers
|
||||
// to see if anyone wants to adopt it now.
|
||||
labelChanged := !reflect.DeepEqual(curPod.Labels, oldPod.Labels)
|
||||
if labelChanged || controllerRefChanged {
|
||||
for _, job := range jm.getPodJobs(curPod) {
|
||||
jm.enqueueController(job, immediate)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// When a pod is deleted, enqueue the job that manages the pod and update its expectations.
|
||||
// obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item.
|
||||
func (jm *JobController) deletePod(obj interface{}) {
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
|
||||
// When a delete is dropped, the relist will notice a pod in the store not
|
||||
// in the list, leading to the insertion of a tombstone object which contains
|
||||
// the deleted key/value. Note that this value might be stale. If the pod
|
||||
// changed labels the new job will not be woken up till the periodic resync.
|
||||
if !ok {
|
||||
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
|
||||
if !ok {
|
||||
utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj))
|
||||
return
|
||||
}
|
||||
pod, ok = tombstone.Obj.(*v1.Pod)
|
||||
if !ok {
|
||||
utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a pod %+v", obj))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
controllerRef := metav1.GetControllerOf(pod)
|
||||
if controllerRef == nil {
|
||||
// No controller should care about orphans being deleted.
|
||||
return
|
||||
}
|
||||
job := jm.resolveControllerRef(pod.Namespace, controllerRef)
|
||||
if job == nil {
|
||||
return
|
||||
}
|
||||
jobKey, err := controller.KeyFunc(job)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
jm.expectations.DeletionObserved(jobKey)
|
||||
jm.enqueueController(job, true)
|
||||
}
|
||||
|
||||
func (jm *JobController) updateJob(old, cur interface{}) {
|
||||
oldJob := old.(*batch.Job)
|
||||
curJob := cur.(*batch.Job)
|
||||
|
||||
// never return error
|
||||
key, err := controller.KeyFunc(curJob)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
jm.enqueueController(curJob, true)
|
||||
// check if need to add a new rsync for ActiveDeadlineSeconds
|
||||
if curJob.Status.StartTime != nil {
|
||||
curADS := curJob.Spec.ActiveDeadlineSeconds
|
||||
if curADS == nil {
|
||||
return
|
||||
}
|
||||
oldADS := oldJob.Spec.ActiveDeadlineSeconds
|
||||
if oldADS == nil || *oldADS != *curADS {
|
||||
now := metav1.Now()
|
||||
start := curJob.Status.StartTime.Time
|
||||
passed := now.Time.Sub(start)
|
||||
total := time.Duration(*curADS) * time.Second
|
||||
// AddAfter will handle total < passed
|
||||
jm.queue.AddAfter(key, total-passed)
|
||||
klog.V(4).Infof("job ActiveDeadlineSeconds updated, will rsync after %d seconds", total-passed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item,
|
||||
// immediate tells the controller to update the status right away, and should
|
||||
// happen ONLY when there was a successful pod run.
|
||||
func (jm *JobController) enqueueController(obj interface{}, immediate bool) {
|
||||
key, err := controller.KeyFunc(obj)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
|
||||
return
|
||||
}
|
||||
|
||||
backoff := time.Duration(0)
|
||||
if !immediate {
|
||||
backoff = getBackoff(jm.queue, key)
|
||||
}
|
||||
|
||||
// TODO: Handle overlapping controllers better. Either disallow them at admission time or
|
||||
// deterministically avoid syncing controllers that fight over pods. Currently, we only
|
||||
// ensure that the same controller is synced for a given pod. When we periodically relist
|
||||
// all controllers there will still be some replica instability. One way to handle this is
|
||||
// by querying the store for all controllers that this rc overlaps, as well as all
|
||||
// controllers that overlap this rc, and sorting them.
|
||||
jm.queue.AddAfter(key, backoff)
|
||||
}
|
||||
|
||||
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
|
||||
// It enforces that the syncHandler is never invoked concurrently with the same key.
|
||||
func (jm *JobController) worker() {
|
||||
for jm.processNextWorkItem() {
|
||||
}
|
||||
}
|
||||
|
||||
func (jm *JobController) processNextWorkItem() bool {
|
||||
key, quit := jm.queue.Get()
|
||||
if quit {
|
||||
return false
|
||||
}
|
||||
defer jm.queue.Done(key)
|
||||
|
||||
forget, err := jm.syncHandler(key.(string))
|
||||
if err == nil {
|
||||
if forget {
|
||||
jm.queue.Forget(key)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
utilruntime.HandleError(fmt.Errorf("Error syncing job: %v", err))
|
||||
jm.queue.AddRateLimited(key)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// getPodsForJob returns the set of pods that this Job should manage.
|
||||
// It also reconciles ControllerRef by adopting/orphaning.
|
||||
// Note that the returned Pods are pointers into the cache.
|
||||
func (jm *JobController) getPodsForJob(j *batch.Job) ([]*v1.Pod, error) {
|
||||
selector, err := metav1.LabelSelectorAsSelector(j.Spec.Selector)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("couldn't convert Job selector: %v", err)
|
||||
}
|
||||
// List all pods to include those that don't match the selector anymore
|
||||
// but have a ControllerRef pointing to this controller.
|
||||
pods, err := jm.podStore.Pods(j.Namespace).List(labels.Everything())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// If any adoptions are attempted, we should first recheck for deletion
|
||||
// with an uncached quorum read sometime after listing Pods (see #42639).
|
||||
canAdoptFunc := controller.RecheckDeletionTimestamp(func() (metav1.Object, error) {
|
||||
fresh, err := jm.kubeClient.BatchV1().Jobs(j.Namespace).Get(j.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if fresh.UID != j.UID {
|
||||
return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", j.Namespace, j.Name, fresh.UID, j.UID)
|
||||
}
|
||||
return fresh, nil
|
||||
})
|
||||
cm := controller.NewPodControllerRefManager(jm.podControl, j, selector, controllerKind, canAdoptFunc)
|
||||
return cm.ClaimPods(pods)
|
||||
}
|
||||
|
||||
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
|
||||
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
|
||||
// concurrently with the same key.
|
||||
func (jm *JobController) syncJob(key string) (bool, error) {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
klog.V(4).Infof("Finished syncing job %q (%v)", key, time.Since(startTime))
|
||||
}()
|
||||
|
||||
ns, name, err := cache.SplitMetaNamespaceKey(key)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if len(ns) == 0 || len(name) == 0 {
|
||||
return false, fmt.Errorf("invalid job key %q: either namespace or name is missing", key)
|
||||
}
|
||||
sharedJob, err := jm.jobLister.Jobs(ns).Get(name)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
klog.V(4).Infof("Job has been deleted: %v", key)
|
||||
jm.expectations.DeleteExpectations(key)
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
job := *sharedJob
|
||||
|
||||
// if job was finished previously, we don't want to redo the termination
|
||||
if IsJobFinished(&job) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// retrieve the previous number of retry
|
||||
previousRetry := jm.queue.NumRequeues(key)
|
||||
|
||||
// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
|
||||
// and update the expectations after we've retrieved active pods from the store. If a new pod enters
|
||||
// the store after we've checked the expectation, the job sync is just deferred till the next relist.
|
||||
jobNeedsSync := jm.expectations.SatisfiedExpectations(key)
|
||||
|
||||
pods, err := jm.getPodsForJob(&job)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
activePods := controller.FilterActivePods(pods)
|
||||
active := int32(len(activePods))
|
||||
succeeded, failed := getStatus(pods)
|
||||
conditions := len(job.Status.Conditions)
|
||||
// job first start
|
||||
if job.Status.StartTime == nil {
|
||||
now := metav1.Now()
|
||||
job.Status.StartTime = &now
|
||||
// enqueue a sync to check if job past ActiveDeadlineSeconds
|
||||
if job.Spec.ActiveDeadlineSeconds != nil {
|
||||
klog.V(4).Infof("Job %s have ActiveDeadlineSeconds will sync after %d seconds",
|
||||
key, *job.Spec.ActiveDeadlineSeconds)
|
||||
jm.queue.AddAfter(key, time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
var manageJobErr error
|
||||
jobFailed := false
|
||||
var failureReason string
|
||||
var failureMessage string
|
||||
|
||||
jobHaveNewFailure := failed > job.Status.Failed
|
||||
// new failures happen when status does not reflect the failures and active
|
||||
// is different than parallelism, otherwise the previous controller loop
|
||||
// failed updating status so even if we pick up failure it is not a new one
|
||||
exceedsBackoffLimit := jobHaveNewFailure && (active != *job.Spec.Parallelism) &&
|
||||
(int32(previousRetry)+1 > *job.Spec.BackoffLimit)
|
||||
|
||||
if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
|
||||
// check if the number of pod restart exceeds backoff (for restart OnFailure only)
|
||||
// OR if the number of failed jobs increased since the last syncJob
|
||||
jobFailed = true
|
||||
failureReason = "BackoffLimitExceeded"
|
||||
failureMessage = "Job has reached the specified backoff limit"
|
||||
} else if pastActiveDeadline(&job) {
|
||||
jobFailed = true
|
||||
failureReason = "DeadlineExceeded"
|
||||
failureMessage = "Job was active longer than specified deadline"
|
||||
}
|
||||
|
||||
if jobFailed {
|
||||
errCh := make(chan error, active)
|
||||
jm.deleteJobPods(&job, activePods, errCh)
|
||||
select {
|
||||
case manageJobErr = <-errCh:
|
||||
if manageJobErr != nil {
|
||||
break
|
||||
}
|
||||
default:
|
||||
}
|
||||
|
||||
// update status values accordingly
|
||||
failed += active
|
||||
active = 0
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, failureReason, failureMessage))
|
||||
jm.recorder.Event(&job, v1.EventTypeWarning, failureReason, failureMessage)
|
||||
} else {
|
||||
if jobNeedsSync && job.DeletionTimestamp == nil {
|
||||
active, manageJobErr = jm.manageJob(activePods, succeeded, &job)
|
||||
}
|
||||
completions := succeeded
|
||||
complete := false
|
||||
if job.Spec.Completions == nil {
|
||||
// This type of job is complete when any pod exits with success.
|
||||
// Each pod is capable of
|
||||
// determining whether or not the entire Job is done. Subsequent pods are
|
||||
// not expected to fail, but if they do, the failure is ignored. Once any
|
||||
// pod succeeds, the controller waits for remaining pods to finish, and
|
||||
// then the job is complete.
|
||||
if succeeded > 0 && active == 0 {
|
||||
complete = true
|
||||
}
|
||||
} else {
|
||||
// Job specifies a number of completions. This type of job signals
|
||||
// success by having that number of successes. Since we do not
|
||||
// start more pods than there are remaining completions, there should
|
||||
// not be any remaining active pods once this count is reached.
|
||||
if completions >= *job.Spec.Completions {
|
||||
complete = true
|
||||
if active > 0 {
|
||||
jm.recorder.Event(&job, v1.EventTypeWarning, "TooManyActivePods", "Too many active pods running after completion count reached")
|
||||
}
|
||||
if completions > *job.Spec.Completions {
|
||||
jm.recorder.Event(&job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
|
||||
}
|
||||
}
|
||||
}
|
||||
if complete {
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", ""))
|
||||
now := metav1.Now()
|
||||
job.Status.CompletionTime = &now
|
||||
}
|
||||
}
|
||||
|
||||
forget := false
|
||||
// Check if the number of jobs succeeded increased since the last check. If yes "forget" should be true
|
||||
// This logic is linked to the issue: https://github.com/kubernetes/kubernetes/issues/56853 that aims to
|
||||
// improve the Job backoff policy when parallelism > 1 and few Jobs failed but others succeed.
|
||||
// In this case, we should clear the backoff delay.
|
||||
if job.Status.Succeeded < succeeded {
|
||||
forget = true
|
||||
}
|
||||
|
||||
// no need to update the job if the status hasn't changed since last time
|
||||
if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions {
|
||||
job.Status.Active = active
|
||||
job.Status.Succeeded = succeeded
|
||||
job.Status.Failed = failed
|
||||
|
||||
if err := jm.updateHandler(&job); err != nil {
|
||||
return forget, err
|
||||
}
|
||||
|
||||
if jobHaveNewFailure && !IsJobFinished(&job) {
|
||||
// returning an error will re-enqueue Job after the backoff period
|
||||
return forget, fmt.Errorf("failed pod(s) detected for job key %q", key)
|
||||
}
|
||||
|
||||
forget = true
|
||||
}
|
||||
|
||||
return forget, manageJobErr
|
||||
}
|
||||
|
||||
func (jm *JobController) deleteJobPods(job *batch.Job, pods []*v1.Pod, errCh chan<- error) {
|
||||
// TODO: below code should be replaced with pod termination resulting in
|
||||
// pod failures, rather than killing pods. Unfortunately none such solution
|
||||
// exists ATM. There's an open discussion in the topic in
|
||||
// https://github.com/kubernetes/kubernetes/issues/14602 which might give
|
||||
// some sort of solution to above problem.
|
||||
// kill remaining active pods
|
||||
wait := sync.WaitGroup{}
|
||||
nbPods := len(pods)
|
||||
wait.Add(nbPods)
|
||||
for i := int32(0); i < int32(nbPods); i++ {
|
||||
go func(ix int32) {
|
||||
defer wait.Done()
|
||||
if err := jm.podControl.DeletePod(job.Namespace, pods[ix].Name, job); err != nil {
|
||||
defer utilruntime.HandleError(err)
|
||||
klog.V(2).Infof("Failed to delete %v, job %q/%q deadline exceeded", pods[ix].Name, job.Namespace, job.Name)
|
||||
errCh <- err
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
wait.Wait()
|
||||
}
|
||||
|
||||
// pastBackoffLimitOnFailure checks if container restartCounts sum exceeds BackoffLimit
|
||||
// this method applies only to pods with restartPolicy == OnFailure
|
||||
func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool {
|
||||
if job.Spec.Template.Spec.RestartPolicy != v1.RestartPolicyOnFailure {
|
||||
return false
|
||||
}
|
||||
result := int32(0)
|
||||
for i := range pods {
|
||||
po := pods[i]
|
||||
if po.Status.Phase != v1.PodRunning {
|
||||
continue
|
||||
}
|
||||
for j := range po.Status.InitContainerStatuses {
|
||||
stat := po.Status.InitContainerStatuses[j]
|
||||
result += stat.RestartCount
|
||||
}
|
||||
for j := range po.Status.ContainerStatuses {
|
||||
stat := po.Status.ContainerStatuses[j]
|
||||
result += stat.RestartCount
|
||||
}
|
||||
}
|
||||
if *job.Spec.BackoffLimit == 0 {
|
||||
return result > 0
|
||||
}
|
||||
return result >= *job.Spec.BackoffLimit
|
||||
}
|
||||
|
||||
// pastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if it is exceeded.
|
||||
func pastActiveDeadline(job *batch.Job) bool {
|
||||
if job.Spec.ActiveDeadlineSeconds == nil || job.Status.StartTime == nil {
|
||||
return false
|
||||
}
|
||||
now := metav1.Now()
|
||||
start := job.Status.StartTime.Time
|
||||
duration := now.Time.Sub(start)
|
||||
allowedDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds) * time.Second
|
||||
return duration >= allowedDuration
|
||||
}
|
||||
|
||||
func newCondition(conditionType batch.JobConditionType, reason, message string) batch.JobCondition {
|
||||
return batch.JobCondition{
|
||||
Type: conditionType,
|
||||
Status: v1.ConditionTrue,
|
||||
LastProbeTime: metav1.Now(),
|
||||
LastTransitionTime: metav1.Now(),
|
||||
Reason: reason,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
|
||||
// getStatus returns no of succeeded and failed pods running a job
|
||||
func getStatus(pods []*v1.Pod) (succeeded, failed int32) {
|
||||
succeeded = int32(filterPods(pods, v1.PodSucceeded))
|
||||
failed = int32(filterPods(pods, v1.PodFailed))
|
||||
return
|
||||
}
|
||||
|
||||
// manageJob is the core method responsible for managing the number of running
|
||||
// pods according to what is specified in the job.Spec.
|
||||
// Does NOT modify <activePods>.
|
||||
func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *batch.Job) (int32, error) {
|
||||
var activeLock sync.Mutex
|
||||
active := int32(len(activePods))
|
||||
parallelism := *job.Spec.Parallelism
|
||||
jobKey, err := controller.KeyFunc(job)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("Couldn't get key for job %#v: %v", job, err))
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
var errCh chan error
|
||||
if active > parallelism {
|
||||
diff := active - parallelism
|
||||
errCh = make(chan error, diff)
|
||||
jm.expectations.ExpectDeletions(jobKey, int(diff))
|
||||
klog.V(4).Infof("Too many pods running job %q, need %d, deleting %d", jobKey, parallelism, diff)
|
||||
// Sort the pods in the order such that not-ready < ready, unscheduled
|
||||
// < scheduled, and pending < running. This ensures that we delete pods
|
||||
// in the earlier stages whenever possible.
|
||||
sort.Sort(controller.ActivePods(activePods))
|
||||
|
||||
active -= diff
|
||||
wait := sync.WaitGroup{}
|
||||
wait.Add(int(diff))
|
||||
for i := int32(0); i < diff; i++ {
|
||||
go func(ix int32) {
|
||||
defer wait.Done()
|
||||
if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, job); err != nil {
|
||||
defer utilruntime.HandleError(err)
|
||||
// Decrement the expected number of deletes because the informer won't observe this deletion
|
||||
klog.V(2).Infof("Failed to delete %v, decrementing expectations for job %q/%q", activePods[ix].Name, job.Namespace, job.Name)
|
||||
jm.expectations.DeletionObserved(jobKey)
|
||||
activeLock.Lock()
|
||||
active++
|
||||
activeLock.Unlock()
|
||||
errCh <- err
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
wait.Wait()
|
||||
|
||||
} else if active < parallelism {
|
||||
wantActive := int32(0)
|
||||
if job.Spec.Completions == nil {
|
||||
// Job does not specify a number of completions. Therefore, number active
|
||||
// should be equal to parallelism, unless the job has seen at least
|
||||
// once success, in which leave whatever is running, running.
|
||||
if succeeded > 0 {
|
||||
wantActive = active
|
||||
} else {
|
||||
wantActive = parallelism
|
||||
}
|
||||
} else {
|
||||
// Job specifies a specific number of completions. Therefore, number
|
||||
// active should not ever exceed number of remaining completions.
|
||||
wantActive = *job.Spec.Completions - succeeded
|
||||
if wantActive > parallelism {
|
||||
wantActive = parallelism
|
||||
}
|
||||
}
|
||||
diff := wantActive - active
|
||||
if diff < 0 {
|
||||
utilruntime.HandleError(fmt.Errorf("More active than wanted: job %q, want %d, have %d", jobKey, wantActive, active))
|
||||
diff = 0
|
||||
}
|
||||
jm.expectations.ExpectCreations(jobKey, int(diff))
|
||||
errCh = make(chan error, diff)
|
||||
klog.V(4).Infof("Too few pods running job %q, need %d, creating %d", jobKey, wantActive, diff)
|
||||
|
||||
active += diff
|
||||
wait := sync.WaitGroup{}
|
||||
|
||||
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
|
||||
// and double with each successful iteration in a kind of "slow start".
|
||||
// This handles attempts to start large numbers of pods that would
|
||||
// likely all fail with the same error. For example a project with a
|
||||
// low quota that attempts to create a large number of pods will be
|
||||
// prevented from spamming the API service with the pod create requests
|
||||
// after one of its pods fails. Conveniently, this also prevents the
|
||||
// event spam that those failures would generate.
|
||||
for batchSize := int32(integer.IntMin(int(diff), controller.SlowStartInitialBatchSize)); diff > 0; batchSize = integer.Int32Min(2*batchSize, diff) {
|
||||
errorCount := len(errCh)
|
||||
wait.Add(int(batchSize))
|
||||
for i := int32(0); i < batchSize; i++ {
|
||||
go func() {
|
||||
defer wait.Done()
|
||||
err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind))
|
||||
if err != nil && errors.IsTimeout(err) {
|
||||
// Pod is created but its initialization has timed out.
|
||||
// If the initialization is successful eventually, the
|
||||
// controller will observe the creation via the informer.
|
||||
// If the initialization fails, or if the pod keeps
|
||||
// uninitialized for a long time, the informer will not
|
||||
// receive any update, and the controller will create a new
|
||||
// pod when the expectation expires.
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
defer utilruntime.HandleError(err)
|
||||
// Decrement the expected number of creates because the informer won't observe this pod
|
||||
klog.V(2).Infof("Failed creation, decrementing expectations for job %q/%q", job.Namespace, job.Name)
|
||||
jm.expectations.CreationObserved(jobKey)
|
||||
activeLock.Lock()
|
||||
active--
|
||||
activeLock.Unlock()
|
||||
errCh <- err
|
||||
}
|
||||
}()
|
||||
}
|
||||
wait.Wait()
|
||||
// any skipped pods that we never attempted to start shouldn't be expected.
|
||||
skippedPods := diff - batchSize
|
||||
if errorCount < len(errCh) && skippedPods > 0 {
|
||||
klog.V(2).Infof("Slow-start failure. Skipping creation of %d pods, decrementing expectations for job %q/%q", skippedPods, job.Namespace, job.Name)
|
||||
active -= skippedPods
|
||||
for i := int32(0); i < skippedPods; i++ {
|
||||
// Decrement the expected number of creates because the informer won't observe this pod
|
||||
jm.expectations.CreationObserved(jobKey)
|
||||
}
|
||||
// The skipped pods will be retried later. The next controller resync will
|
||||
// retry the slow start process.
|
||||
break
|
||||
}
|
||||
diff -= batchSize
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case err := <-errCh:
|
||||
// all errors have been reported before, we only need to inform the controller that there was an error and it should re-try this job once more next time.
|
||||
if err != nil {
|
||||
return active, err
|
||||
}
|
||||
default:
|
||||
}
|
||||
|
||||
return active, nil
|
||||
}
|
||||
|
||||
func (jm *JobController) updateJobStatus(job *batch.Job) error {
|
||||
jobClient := jm.kubeClient.BatchV1().Jobs(job.Namespace)
|
||||
var err error
|
||||
for i := 0; i <= statusUpdateRetries; i = i + 1 {
|
||||
var newJob *batch.Job
|
||||
newJob, err = jobClient.Get(job.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
newJob.Status = job.Status
|
||||
if _, err = jobClient.UpdateStatus(newJob); err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func getBackoff(queue workqueue.RateLimitingInterface, key interface{}) time.Duration {
|
||||
exp := queue.NumRequeues(key)
|
||||
|
||||
if exp <= 0 {
|
||||
return time.Duration(0)
|
||||
}
|
||||
|
||||
// The backoff is capped such that 'calculated' value never overflows.
|
||||
backoff := float64(DefaultJobBackOff.Nanoseconds()) * math.Pow(2, float64(exp-1))
|
||||
if backoff > math.MaxInt64 {
|
||||
return MaxJobBackOff
|
||||
}
|
||||
|
||||
calculated := time.Duration(backoff)
|
||||
if calculated > MaxJobBackOff {
|
||||
return MaxJobBackOff
|
||||
}
|
||||
return calculated
|
||||
}
|
||||
|
||||
// filterPods returns pods based on their phase.
|
||||
func filterPods(pods []*v1.Pod, phase v1.PodPhase) int {
|
||||
result := 0
|
||||
for i := range pods {
|
||||
if phase == pods[i].Status.Phase {
|
||||
result++
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
31
vendor/k8s.io/kubernetes/pkg/controller/job/utils.go
generated
vendored
Normal file
31
vendor/k8s.io/kubernetes/pkg/controller/job/utils.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package job
|
||||
|
||||
import (
|
||||
batch "k8s.io/api/batch/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
func IsJobFinished(j *batch.Job) bool {
|
||||
for _, c := range j.Status.Conditions {
|
||||
if (c.Type == batch.JobComplete || c.Type == batch.JobFailed) && c.Status == v1.ConditionTrue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
92
vendor/k8s.io/kubernetes/pkg/controller/lookup_cache.go
generated
vendored
Normal file
92
vendor/k8s.io/kubernetes/pkg/controller/lookup_cache.go
generated
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package controller
|
||||
|
||||
import (
|
||||
"hash/fnv"
|
||||
"sync"
|
||||
|
||||
"github.com/golang/groupcache/lru"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
||||
)
|
||||
|
||||
type objectWithMeta interface {
|
||||
metav1.Object
|
||||
}
|
||||
|
||||
// keyFunc returns the key of an object, which is used to look up in the cache for it's matching object.
|
||||
// Since we match objects by namespace and Labels/Selector, so if two objects have the same namespace and labels,
|
||||
// they will have the same key.
|
||||
func keyFunc(obj objectWithMeta) uint64 {
|
||||
hash := fnv.New32a()
|
||||
hashutil.DeepHashObject(hash, &equivalenceLabelObj{
|
||||
namespace: obj.GetNamespace(),
|
||||
labels: obj.GetLabels(),
|
||||
})
|
||||
return uint64(hash.Sum32())
|
||||
}
|
||||
|
||||
type equivalenceLabelObj struct {
|
||||
namespace string
|
||||
labels map[string]string
|
||||
}
|
||||
|
||||
// MatchingCache save label and selector matching relationship
|
||||
type MatchingCache struct {
|
||||
mutex sync.RWMutex
|
||||
cache *lru.Cache
|
||||
}
|
||||
|
||||
// NewMatchingCache return a NewMatchingCache, which save label and selector matching relationship.
|
||||
func NewMatchingCache(maxCacheEntries int) *MatchingCache {
|
||||
return &MatchingCache{
|
||||
cache: lru.New(maxCacheEntries),
|
||||
}
|
||||
}
|
||||
|
||||
// Add will add matching information to the cache.
|
||||
func (c *MatchingCache) Add(labelObj objectWithMeta, selectorObj objectWithMeta) {
|
||||
key := keyFunc(labelObj)
|
||||
c.mutex.Lock()
|
||||
defer c.mutex.Unlock()
|
||||
c.cache.Add(key, selectorObj)
|
||||
}
|
||||
|
||||
// GetMatchingObject lookup the matching object for a given object.
|
||||
// Note: the cache information may be invalid since the controller may be deleted or updated,
|
||||
// we need check in the external request to ensure the cache data is not dirty.
|
||||
func (c *MatchingCache) GetMatchingObject(labelObj objectWithMeta) (controller interface{}, exists bool) {
|
||||
key := keyFunc(labelObj)
|
||||
// NOTE: we use Lock() instead of RLock() here because lru's Get() method also modifies state(
|
||||
// it need update the least recently usage information). So we can not call it concurrently.
|
||||
c.mutex.Lock()
|
||||
defer c.mutex.Unlock()
|
||||
return c.cache.Get(key)
|
||||
}
|
||||
|
||||
// Update update the cached matching information.
|
||||
func (c *MatchingCache) Update(labelObj objectWithMeta, selectorObj objectWithMeta) {
|
||||
c.Add(labelObj, selectorObj)
|
||||
}
|
||||
|
||||
// InvalidateAll invalidate the whole cache.
|
||||
func (c *MatchingCache) InvalidateAll() {
|
||||
c.mutex.Lock()
|
||||
defer c.mutex.Unlock()
|
||||
c.cache = lru.New(c.cache.MaxEntries)
|
||||
}
|
78
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/metrics.go
generated
vendored
Normal file
78
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/metrics.go
generated
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodelifecycle
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
nodeControllerSubsystem = "node_collector"
|
||||
zoneHealthStatisticKey = "zone_health"
|
||||
zoneSizeKey = "zone_size"
|
||||
zoneNoUnhealthyNodesKey = "unhealthy_nodes_in_zone"
|
||||
evictionsNumberKey = "evictions_number"
|
||||
)
|
||||
|
||||
var (
|
||||
zoneHealth = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: zoneHealthStatisticKey,
|
||||
Help: "Gauge measuring percentage of healthy nodes per zone.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
zoneSize = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: zoneSizeKey,
|
||||
Help: "Gauge measuring number of registered Nodes per zones.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
unhealthyNodes = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: zoneNoUnhealthyNodesKey,
|
||||
Help: "Gauge measuring number of not Ready Nodes per zones.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
evictionsNumber = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: evictionsNumberKey,
|
||||
Help: "Number of Node evictions that happened since current instance of NodeController started.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register the metrics that are to be monitored.
|
||||
func Register() {
|
||||
registerMetrics.Do(func() {
|
||||
prometheus.MustRegister(zoneHealth)
|
||||
prometheus.MustRegister(zoneSize)
|
||||
prometheus.MustRegister(unhealthyNodes)
|
||||
prometheus.MustRegister(evictionsNumber)
|
||||
})
|
||||
}
|
1284
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/node_lifecycle_controller.go
generated
vendored
Normal file
1284
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/node_lifecycle_controller.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
309
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go
generated
vendored
Normal file
309
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go
generated
vendored
Normal file
@ -0,0 +1,309 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/util/flowcontrol"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
// NodeHealthUpdateRetry controls the number of retries of writing
|
||||
// node health update.
|
||||
NodeHealthUpdateRetry = 5
|
||||
// NodeEvictionPeriod controls how often NodeController will try to
|
||||
// evict Pods from non-responsive Nodes.
|
||||
NodeEvictionPeriod = 100 * time.Millisecond
|
||||
// EvictionRateLimiterBurst is the burst value for all eviction rate
|
||||
// limiters
|
||||
EvictionRateLimiterBurst = 1
|
||||
)
|
||||
|
||||
// TimedValue is a value that should be processed at a designated time.
|
||||
type TimedValue struct {
|
||||
Value string
|
||||
// UID could be anything that helps identify the value
|
||||
UID interface{}
|
||||
AddedAt time.Time
|
||||
ProcessAt time.Time
|
||||
}
|
||||
|
||||
// now is used to test time
|
||||
var now = time.Now
|
||||
|
||||
// TimedQueue is a priority heap where the lowest ProcessAt is at the front of the queue
|
||||
type TimedQueue []*TimedValue
|
||||
|
||||
// Len is the length of the queue.
|
||||
func (h TimedQueue) Len() int { return len(h) }
|
||||
|
||||
// Less returns true if queue[i] < queue[j].
|
||||
func (h TimedQueue) Less(i, j int) bool { return h[i].ProcessAt.Before(h[j].ProcessAt) }
|
||||
|
||||
// Swap swaps index i and j.
|
||||
func (h TimedQueue) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
|
||||
// Push a new TimedValue on to the queue.
|
||||
func (h *TimedQueue) Push(x interface{}) {
|
||||
*h = append(*h, x.(*TimedValue))
|
||||
}
|
||||
|
||||
// Pop the lowest ProcessAt item.
|
||||
func (h *TimedQueue) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
// UniqueQueue is a FIFO queue which additionally guarantees that any
|
||||
// element can be added only once until it is removed.
|
||||
type UniqueQueue struct {
|
||||
lock sync.Mutex
|
||||
queue TimedQueue
|
||||
set sets.String
|
||||
}
|
||||
|
||||
// Add a new value to the queue if it wasn't added before, or was
|
||||
// explicitly removed by the Remove call. Returns true if new value
|
||||
// was added.
|
||||
func (q *UniqueQueue) Add(value TimedValue) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
if q.set.Has(value.Value) {
|
||||
return false
|
||||
}
|
||||
heap.Push(&q.queue, &value)
|
||||
q.set.Insert(value.Value)
|
||||
return true
|
||||
}
|
||||
|
||||
// Replace replaces an existing value in the queue if it already
|
||||
// exists, otherwise it does nothing. Returns true if the item was
|
||||
// found.
|
||||
func (q *UniqueQueue) Replace(value TimedValue) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
for i := range q.queue {
|
||||
if q.queue[i].Value != value.Value {
|
||||
continue
|
||||
}
|
||||
heap.Remove(&q.queue, i)
|
||||
heap.Push(&q.queue, &value)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// RemoveFromQueue the value from the queue, but keeps it in the set,
|
||||
// so it won't be added second time. Returns true if something was
|
||||
// removed.
|
||||
func (q *UniqueQueue) RemoveFromQueue(value string) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
if !q.set.Has(value) {
|
||||
return false
|
||||
}
|
||||
for i, val := range q.queue {
|
||||
if val.Value == value {
|
||||
heap.Remove(&q.queue, i)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Remove the value from the queue, so Get() call won't return it, and
|
||||
// allow subsequent addition of the given value. If the value is not
|
||||
// present does nothing and returns false.
|
||||
func (q *UniqueQueue) Remove(value string) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
if !q.set.Has(value) {
|
||||
return false
|
||||
}
|
||||
q.set.Delete(value)
|
||||
for i, val := range q.queue {
|
||||
if val.Value == value {
|
||||
heap.Remove(&q.queue, i)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Get returns the oldest added value that wasn't returned yet.
|
||||
func (q *UniqueQueue) Get() (TimedValue, bool) {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
if len(q.queue) == 0 {
|
||||
return TimedValue{}, false
|
||||
}
|
||||
result := heap.Pop(&q.queue).(*TimedValue)
|
||||
q.set.Delete(result.Value)
|
||||
return *result, true
|
||||
}
|
||||
|
||||
// Head returns the oldest added value that wasn't returned yet
|
||||
// without removing it.
|
||||
func (q *UniqueQueue) Head() (TimedValue, bool) {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
if len(q.queue) == 0 {
|
||||
return TimedValue{}, false
|
||||
}
|
||||
result := q.queue[0]
|
||||
return *result, true
|
||||
}
|
||||
|
||||
// Clear removes all items from the queue and duplication preventing
|
||||
// set.
|
||||
func (q *UniqueQueue) Clear() {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
if q.queue.Len() > 0 {
|
||||
q.queue = make(TimedQueue, 0)
|
||||
}
|
||||
if len(q.set) > 0 {
|
||||
q.set = sets.NewString()
|
||||
}
|
||||
}
|
||||
|
||||
// RateLimitedTimedQueue is a unique item priority queue ordered by
|
||||
// the expected next time of execution. It is also rate limited.
|
||||
type RateLimitedTimedQueue struct {
|
||||
queue UniqueQueue
|
||||
limiterLock sync.Mutex
|
||||
limiter flowcontrol.RateLimiter
|
||||
}
|
||||
|
||||
// NewRateLimitedTimedQueue creates new queue which will use given
|
||||
// RateLimiter to oversee execution.
|
||||
func NewRateLimitedTimedQueue(limiter flowcontrol.RateLimiter) *RateLimitedTimedQueue {
|
||||
return &RateLimitedTimedQueue{
|
||||
queue: UniqueQueue{
|
||||
queue: TimedQueue{},
|
||||
set: sets.NewString(),
|
||||
},
|
||||
limiter: limiter,
|
||||
}
|
||||
}
|
||||
|
||||
// ActionFunc takes a timed value and returns false if the item must
|
||||
// be retried, with an optional time.Duration if some minimum wait
|
||||
// interval should be used.
|
||||
type ActionFunc func(TimedValue) (bool, time.Duration)
|
||||
|
||||
// Try processes the queue.Ends prematurely if RateLimiter forbids an
|
||||
// action and leak is true. Otherwise, requeues the item to be
|
||||
// processed. Each value is processed once if fn returns true,
|
||||
// otherwise it is added back to the queue. The returned remaining is
|
||||
// used to identify the minimum time to execute the next item in the
|
||||
// queue. The same value is processed only once unless Remove is
|
||||
// explicitly called on it (it's done by the cancelPodEviction
|
||||
// function in NodeController when Node becomes Ready again) TODO:
|
||||
// figure out a good way to do garbage collection for all Nodes that
|
||||
// were removed from the cluster.
|
||||
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
|
||||
val, ok := q.queue.Head()
|
||||
q.limiterLock.Lock()
|
||||
defer q.limiterLock.Unlock()
|
||||
for ok {
|
||||
// rate limit the queue checking
|
||||
if !q.limiter.TryAccept() {
|
||||
klog.V(10).Infof("Try rate limited for value: %v", val)
|
||||
// Try again later
|
||||
break
|
||||
}
|
||||
|
||||
now := now()
|
||||
if now.Before(val.ProcessAt) {
|
||||
break
|
||||
}
|
||||
|
||||
if ok, wait := fn(val); !ok {
|
||||
val.ProcessAt = now.Add(wait + 1)
|
||||
q.queue.Replace(val)
|
||||
} else {
|
||||
q.queue.RemoveFromQueue(val.Value)
|
||||
}
|
||||
val, ok = q.queue.Head()
|
||||
}
|
||||
}
|
||||
|
||||
// Add value to the queue to be processed. Won't add the same
|
||||
// value(comparison by value) a second time if it was already added
|
||||
// and not removed.
|
||||
func (q *RateLimitedTimedQueue) Add(value string, uid interface{}) bool {
|
||||
now := now()
|
||||
return q.queue.Add(TimedValue{
|
||||
Value: value,
|
||||
UID: uid,
|
||||
AddedAt: now,
|
||||
ProcessAt: now,
|
||||
})
|
||||
}
|
||||
|
||||
// Remove Node from the Evictor. The Node won't be processed until
|
||||
// added again.
|
||||
func (q *RateLimitedTimedQueue) Remove(value string) bool {
|
||||
return q.queue.Remove(value)
|
||||
}
|
||||
|
||||
// Clear removes all items from the queue
|
||||
func (q *RateLimitedTimedQueue) Clear() {
|
||||
q.queue.Clear()
|
||||
}
|
||||
|
||||
// SwapLimiter safely swaps current limiter for this queue with the
|
||||
// passed one if capacities or qps's differ.
|
||||
func (q *RateLimitedTimedQueue) SwapLimiter(newQPS float32) {
|
||||
q.limiterLock.Lock()
|
||||
defer q.limiterLock.Unlock()
|
||||
if q.limiter.QPS() == newQPS {
|
||||
return
|
||||
}
|
||||
var newLimiter flowcontrol.RateLimiter
|
||||
if newQPS <= 0 {
|
||||
newLimiter = flowcontrol.NewFakeNeverRateLimiter()
|
||||
} else {
|
||||
newLimiter = flowcontrol.NewTokenBucketRateLimiter(newQPS, EvictionRateLimiterBurst)
|
||||
|
||||
// If we're currently waiting on limiter, we drain the new one - this is a good approach when Burst value is 1
|
||||
// TODO: figure out if we need to support higher Burst values and decide on the drain logic, should we keep:
|
||||
// - saturation (percentage of used tokens)
|
||||
// - number of used tokens
|
||||
// - number of available tokens
|
||||
// - something else
|
||||
if q.limiter.TryAccept() == false {
|
||||
newLimiter.TryAccept()
|
||||
}
|
||||
}
|
||||
q.limiter.Stop()
|
||||
q.limiter = newLimiter
|
||||
}
|
500
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/taint_manager.go
generated
vendored
Normal file
500
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/taint_manager.go
generated
vendored
Normal file
@ -0,0 +1,500 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/kubernetes/pkg/apis/core/helper"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
// TODO (k82cn): Figure out a reasonable number of workers/channels and propagate
|
||||
// the number of workers up making it a paramater of Run() function.
|
||||
|
||||
// NodeUpdateChannelSize defines the size of channel for node update events.
|
||||
NodeUpdateChannelSize = 10
|
||||
// UpdateWorkerSize defines the size of workers for node update or/and pod update.
|
||||
UpdateWorkerSize = 8
|
||||
podUpdateChannelSize = 1
|
||||
retries = 5
|
||||
)
|
||||
|
||||
type nodeUpdateItem struct {
|
||||
nodeName string
|
||||
}
|
||||
|
||||
type podUpdateItem struct {
|
||||
podName string
|
||||
podNamespace string
|
||||
nodeName string
|
||||
}
|
||||
|
||||
func hash(val string, max int) int {
|
||||
hasher := fnv.New32a()
|
||||
io.WriteString(hasher, val)
|
||||
return int(hasher.Sum32() % uint32(max))
|
||||
}
|
||||
|
||||
// GetPodFunc returns the pod for the specified name/namespace, or a NotFound error if missing.
|
||||
type GetPodFunc func(name, namespace string) (*v1.Pod, error)
|
||||
|
||||
// GetNodeFunc returns the node for the specified name, or a NotFound error if missing.
|
||||
type GetNodeFunc func(name string) (*v1.Node, error)
|
||||
|
||||
// NoExecuteTaintManager listens to Taint/Toleration changes and is responsible for removing Pods
|
||||
// from Nodes tainted with NoExecute Taints.
|
||||
type NoExecuteTaintManager struct {
|
||||
client clientset.Interface
|
||||
recorder record.EventRecorder
|
||||
getPod GetPodFunc
|
||||
getNode GetNodeFunc
|
||||
|
||||
taintEvictionQueue *TimedWorkerQueue
|
||||
// keeps a map from nodeName to all noExecute taints on that Node
|
||||
taintedNodesLock sync.Mutex
|
||||
taintedNodes map[string][]v1.Taint
|
||||
|
||||
nodeUpdateChannels []chan nodeUpdateItem
|
||||
podUpdateChannels []chan podUpdateItem
|
||||
|
||||
nodeUpdateQueue workqueue.Interface
|
||||
podUpdateQueue workqueue.Interface
|
||||
}
|
||||
|
||||
func deletePodHandler(c clientset.Interface, emitEventFunc func(types.NamespacedName)) func(args *WorkArgs) error {
|
||||
return func(args *WorkArgs) error {
|
||||
ns := args.NamespacedName.Namespace
|
||||
name := args.NamespacedName.Name
|
||||
klog.V(0).Infof("NoExecuteTaintManager is deleting Pod: %v", args.NamespacedName.String())
|
||||
if emitEventFunc != nil {
|
||||
emitEventFunc(args.NamespacedName)
|
||||
}
|
||||
var err error
|
||||
for i := 0; i < retries; i++ {
|
||||
err = c.CoreV1().Pods(ns).Delete(name, &metav1.DeleteOptions{})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
func getNoExecuteTaints(taints []v1.Taint) []v1.Taint {
|
||||
result := []v1.Taint{}
|
||||
for i := range taints {
|
||||
if taints[i].Effect == v1.TaintEffectNoExecute {
|
||||
result = append(result, taints[i])
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func getPodsAssignedToNode(c clientset.Interface, nodeName string) ([]v1.Pod, error) {
|
||||
selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName})
|
||||
pods, err := c.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
|
||||
FieldSelector: selector.String(),
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
for i := 0; i < retries && err != nil; i++ {
|
||||
pods, err = c.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
|
||||
FieldSelector: selector.String(),
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
if err != nil {
|
||||
return []v1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName)
|
||||
}
|
||||
return pods.Items, nil
|
||||
}
|
||||
|
||||
// getMinTolerationTime returns minimal toleration time from the given slice, or -1 if it's infinite.
|
||||
func getMinTolerationTime(tolerations []v1.Toleration) time.Duration {
|
||||
minTolerationTime := int64(-1)
|
||||
if len(tolerations) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
for i := range tolerations {
|
||||
if tolerations[i].TolerationSeconds != nil {
|
||||
tolerationSeconds := *(tolerations[i].TolerationSeconds)
|
||||
if tolerationSeconds <= 0 {
|
||||
return 0
|
||||
} else if tolerationSeconds < minTolerationTime || minTolerationTime == -1 {
|
||||
minTolerationTime = tolerationSeconds
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return time.Duration(minTolerationTime) * time.Second
|
||||
}
|
||||
|
||||
// NewNoExecuteTaintManager creates a new NoExecuteTaintManager that will use passed clientset to
|
||||
// communicate with the API server.
|
||||
func NewNoExecuteTaintManager(c clientset.Interface, getPod GetPodFunc, getNode GetNodeFunc) *NoExecuteTaintManager {
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"})
|
||||
eventBroadcaster.StartLogging(klog.Infof)
|
||||
if c != nil {
|
||||
klog.V(0).Infof("Sending events to api server.")
|
||||
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.CoreV1().Events("")})
|
||||
} else {
|
||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
||||
}
|
||||
|
||||
tm := &NoExecuteTaintManager{
|
||||
client: c,
|
||||
recorder: recorder,
|
||||
getPod: getPod,
|
||||
getNode: getNode,
|
||||
taintedNodes: make(map[string][]v1.Taint),
|
||||
|
||||
nodeUpdateQueue: workqueue.NewNamed("noexec_taint_node"),
|
||||
podUpdateQueue: workqueue.NewNamed("noexec_taint_pod"),
|
||||
}
|
||||
tm.taintEvictionQueue = CreateWorkerQueue(deletePodHandler(c, tm.emitPodDeletionEvent))
|
||||
|
||||
return tm
|
||||
}
|
||||
|
||||
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
|
||||
func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) {
|
||||
klog.V(0).Infof("Starting NoExecuteTaintManager")
|
||||
|
||||
for i := 0; i < UpdateWorkerSize; i++ {
|
||||
tc.nodeUpdateChannels = append(tc.nodeUpdateChannels, make(chan nodeUpdateItem, NodeUpdateChannelSize))
|
||||
tc.podUpdateChannels = append(tc.podUpdateChannels, make(chan podUpdateItem, podUpdateChannelSize))
|
||||
}
|
||||
|
||||
// Functions that are responsible for taking work items out of the workqueues and putting them
|
||||
// into channels.
|
||||
go func(stopCh <-chan struct{}) {
|
||||
for {
|
||||
item, shutdown := tc.nodeUpdateQueue.Get()
|
||||
if shutdown {
|
||||
break
|
||||
}
|
||||
nodeUpdate := item.(nodeUpdateItem)
|
||||
hash := hash(nodeUpdate.nodeName, UpdateWorkerSize)
|
||||
select {
|
||||
case <-stopCh:
|
||||
tc.nodeUpdateQueue.Done(item)
|
||||
return
|
||||
case tc.nodeUpdateChannels[hash] <- nodeUpdate:
|
||||
// tc.nodeUpdateQueue.Done is called by the nodeUpdateChannels worker
|
||||
}
|
||||
}
|
||||
}(stopCh)
|
||||
|
||||
go func(stopCh <-chan struct{}) {
|
||||
for {
|
||||
item, shutdown := tc.podUpdateQueue.Get()
|
||||
if shutdown {
|
||||
break
|
||||
}
|
||||
podUpdate := item.(podUpdateItem)
|
||||
hash := hash(podUpdate.nodeName, UpdateWorkerSize)
|
||||
select {
|
||||
case <-stopCh:
|
||||
tc.podUpdateQueue.Done(item)
|
||||
return
|
||||
case tc.podUpdateChannels[hash] <- podUpdate:
|
||||
// tc.podUpdateQueue.Done is called by the podUpdateChannels worker
|
||||
}
|
||||
}
|
||||
}(stopCh)
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(UpdateWorkerSize)
|
||||
for i := 0; i < UpdateWorkerSize; i++ {
|
||||
go tc.worker(i, wg.Done, stopCh)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) worker(worker int, done func(), stopCh <-chan struct{}) {
|
||||
defer done()
|
||||
|
||||
// When processing events we want to prioritize Node updates over Pod updates,
|
||||
// as NodeUpdates that interest NoExecuteTaintManager should be handled as soon as possible -
|
||||
// we don't want user (or system) to wait until PodUpdate queue is drained before it can
|
||||
// start evicting Pods from tainted Nodes.
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
case nodeUpdate := <-tc.nodeUpdateChannels[worker]:
|
||||
tc.handleNodeUpdate(nodeUpdate)
|
||||
tc.nodeUpdateQueue.Done(nodeUpdate)
|
||||
case podUpdate := <-tc.podUpdateChannels[worker]:
|
||||
// If we found a Pod update we need to empty Node queue first.
|
||||
priority:
|
||||
for {
|
||||
select {
|
||||
case nodeUpdate := <-tc.nodeUpdateChannels[worker]:
|
||||
tc.handleNodeUpdate(nodeUpdate)
|
||||
tc.nodeUpdateQueue.Done(nodeUpdate)
|
||||
default:
|
||||
break priority
|
||||
}
|
||||
}
|
||||
// After Node queue is emptied we process podUpdate.
|
||||
tc.handlePodUpdate(podUpdate)
|
||||
tc.podUpdateQueue.Done(podUpdate)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PodUpdated is used to notify NoExecuteTaintManager about Pod changes.
|
||||
func (tc *NoExecuteTaintManager) PodUpdated(oldPod *v1.Pod, newPod *v1.Pod) {
|
||||
podName := ""
|
||||
podNamespace := ""
|
||||
nodeName := ""
|
||||
oldTolerations := []v1.Toleration{}
|
||||
if oldPod != nil {
|
||||
podName = oldPod.Name
|
||||
podNamespace = oldPod.Namespace
|
||||
nodeName = oldPod.Spec.NodeName
|
||||
oldTolerations = oldPod.Spec.Tolerations
|
||||
}
|
||||
newTolerations := []v1.Toleration{}
|
||||
if newPod != nil {
|
||||
podName = newPod.Name
|
||||
podNamespace = newPod.Namespace
|
||||
nodeName = newPod.Spec.NodeName
|
||||
newTolerations = newPod.Spec.Tolerations
|
||||
}
|
||||
|
||||
if oldPod != nil && newPod != nil && helper.Semantic.DeepEqual(oldTolerations, newTolerations) && oldPod.Spec.NodeName == newPod.Spec.NodeName {
|
||||
return
|
||||
}
|
||||
updateItem := podUpdateItem{
|
||||
podName: podName,
|
||||
podNamespace: podNamespace,
|
||||
nodeName: nodeName,
|
||||
}
|
||||
|
||||
tc.podUpdateQueue.Add(updateItem)
|
||||
}
|
||||
|
||||
// NodeUpdated is used to notify NoExecuteTaintManager about Node changes.
|
||||
func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node) {
|
||||
nodeName := ""
|
||||
oldTaints := []v1.Taint{}
|
||||
if oldNode != nil {
|
||||
nodeName = oldNode.Name
|
||||
oldTaints = getNoExecuteTaints(oldNode.Spec.Taints)
|
||||
}
|
||||
|
||||
newTaints := []v1.Taint{}
|
||||
if newNode != nil {
|
||||
nodeName = newNode.Name
|
||||
newTaints = getNoExecuteTaints(newNode.Spec.Taints)
|
||||
}
|
||||
|
||||
if oldNode != nil && newNode != nil && helper.Semantic.DeepEqual(oldTaints, newTaints) {
|
||||
return
|
||||
}
|
||||
updateItem := nodeUpdateItem{
|
||||
nodeName: nodeName,
|
||||
}
|
||||
|
||||
tc.nodeUpdateQueue.Add(updateItem)
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName) {
|
||||
if tc.taintEvictionQueue.CancelWork(nsName.String()) {
|
||||
tc.emitCancelPodDeletionEvent(nsName)
|
||||
}
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) processPodOnNode(
|
||||
podNamespacedName types.NamespacedName,
|
||||
nodeName string,
|
||||
tolerations []v1.Toleration,
|
||||
taints []v1.Taint,
|
||||
now time.Time,
|
||||
) {
|
||||
if len(taints) == 0 {
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
}
|
||||
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
|
||||
if !allTolerated {
|
||||
klog.V(2).Infof("Not all taints are tolerated after update for Pod %v on %v", podNamespacedName.String(), nodeName)
|
||||
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
|
||||
return
|
||||
}
|
||||
minTolerationTime := getMinTolerationTime(usedTolerations)
|
||||
// getMinTolerationTime returns negative value to denote infinite toleration.
|
||||
if minTolerationTime < 0 {
|
||||
klog.V(4).Infof("New tolerations for %v tolerate forever. Scheduled deletion won't be cancelled if already scheduled.", podNamespacedName.String())
|
||||
return
|
||||
}
|
||||
|
||||
startTime := now
|
||||
triggerTime := startTime.Add(minTolerationTime)
|
||||
scheduledEviction := tc.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String())
|
||||
if scheduledEviction != nil {
|
||||
startTime = scheduledEviction.CreatedAt
|
||||
if startTime.Add(minTolerationTime).Before(triggerTime) {
|
||||
return
|
||||
}
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
}
|
||||
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) handlePodUpdate(podUpdate podUpdateItem) {
|
||||
pod, err := tc.getPod(podUpdate.podName, podUpdate.podNamespace)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Delete
|
||||
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
|
||||
klog.V(4).Infof("Noticed pod deletion: %#v", podNamespacedName)
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
return
|
||||
}
|
||||
utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err))
|
||||
return
|
||||
}
|
||||
|
||||
// We key the workqueue and shard workers by nodeName. If we don't match the current state we should not be the one processing the current object.
|
||||
if pod.Spec.NodeName != podUpdate.nodeName {
|
||||
return
|
||||
}
|
||||
|
||||
// Create or Update
|
||||
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
||||
klog.V(4).Infof("Noticed pod update: %#v", podNamespacedName)
|
||||
nodeName := pod.Spec.NodeName
|
||||
if nodeName == "" {
|
||||
return
|
||||
}
|
||||
taints, ok := func() ([]v1.Taint, bool) {
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
taints, ok := tc.taintedNodes[nodeName]
|
||||
return taints, ok
|
||||
}()
|
||||
// It's possible that Node was deleted, or Taints were removed before, which triggered
|
||||
// eviction cancelling if it was needed.
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
tc.processPodOnNode(podNamespacedName, nodeName, pod.Spec.Tolerations, taints, time.Now())
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
|
||||
node, err := tc.getNode(nodeUpdate.nodeName)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Delete
|
||||
klog.V(4).Infof("Noticed node deletion: %#v", nodeUpdate.nodeName)
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
delete(tc.taintedNodes, nodeUpdate.nodeName)
|
||||
return
|
||||
}
|
||||
utilruntime.HandleError(fmt.Errorf("cannot get node %s: %v", nodeUpdate.nodeName, err))
|
||||
return
|
||||
}
|
||||
|
||||
// Create or Update
|
||||
klog.V(4).Infof("Noticed node update: %#v", nodeUpdate)
|
||||
taints := getNoExecuteTaints(node.Spec.Taints)
|
||||
func() {
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
klog.V(4).Infof("Updating known taints on node %v: %v", node.Name, taints)
|
||||
if len(taints) == 0 {
|
||||
delete(tc.taintedNodes, node.Name)
|
||||
} else {
|
||||
tc.taintedNodes[node.Name] = taints
|
||||
}
|
||||
}()
|
||||
pods, err := getPodsAssignedToNode(tc.client, node.Name)
|
||||
if err != nil {
|
||||
klog.Errorf(err.Error())
|
||||
return
|
||||
}
|
||||
if len(pods) == 0 {
|
||||
return
|
||||
}
|
||||
// Short circuit, to make this controller a bit faster.
|
||||
if len(taints) == 0 {
|
||||
klog.V(4).Infof("All taints were removed from the Node %v. Cancelling all evictions...", node.Name)
|
||||
for i := range pods {
|
||||
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
for i := range pods {
|
||||
pod := &pods[i]
|
||||
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
||||
tc.processPodOnNode(podNamespacedName, node.Name, pod.Spec.Tolerations, taints, now)
|
||||
}
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) emitPodDeletionEvent(nsName types.NamespacedName) {
|
||||
if tc.recorder == nil {
|
||||
return
|
||||
}
|
||||
ref := &v1.ObjectReference{
|
||||
Kind: "Pod",
|
||||
Name: nsName.Name,
|
||||
Namespace: nsName.Namespace,
|
||||
}
|
||||
tc.recorder.Eventf(ref, v1.EventTypeNormal, "TaintManagerEviction", "Marking for deletion Pod %s", nsName.String())
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) emitCancelPodDeletionEvent(nsName types.NamespacedName) {
|
||||
if tc.recorder == nil {
|
||||
return
|
||||
}
|
||||
ref := &v1.ObjectReference{
|
||||
Kind: "Pod",
|
||||
Name: nsName.Name,
|
||||
Namespace: nsName.Namespace,
|
||||
}
|
||||
tc.recorder.Eventf(ref, v1.EventTypeNormal, "TaintManagerEviction", "Cancelling deletion of Pod %s", nsName.String())
|
||||
}
|
145
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/timed_workers.go
generated
vendored
Normal file
145
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/timed_workers.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
// WorkArgs keeps arguments that will be passed to the function executed by the worker.
|
||||
type WorkArgs struct {
|
||||
NamespacedName types.NamespacedName
|
||||
}
|
||||
|
||||
// KeyFromWorkArgs creates a key for the given `WorkArgs`
|
||||
func (w *WorkArgs) KeyFromWorkArgs() string {
|
||||
return w.NamespacedName.String()
|
||||
}
|
||||
|
||||
// NewWorkArgs is a helper function to create new `WorkArgs`
|
||||
func NewWorkArgs(name, namespace string) *WorkArgs {
|
||||
return &WorkArgs{types.NamespacedName{Namespace: namespace, Name: name}}
|
||||
}
|
||||
|
||||
// TimedWorker is a responsible for executing a function no earlier than at FireAt time.
|
||||
type TimedWorker struct {
|
||||
WorkItem *WorkArgs
|
||||
CreatedAt time.Time
|
||||
FireAt time.Time
|
||||
Timer *time.Timer
|
||||
}
|
||||
|
||||
// CreateWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
|
||||
func CreateWorker(args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(args *WorkArgs) error) *TimedWorker {
|
||||
delay := fireAt.Sub(createdAt)
|
||||
if delay <= 0 {
|
||||
go f(args)
|
||||
return nil
|
||||
}
|
||||
timer := time.AfterFunc(delay, func() { f(args) })
|
||||
return &TimedWorker{
|
||||
WorkItem: args,
|
||||
CreatedAt: createdAt,
|
||||
FireAt: fireAt,
|
||||
Timer: timer,
|
||||
}
|
||||
}
|
||||
|
||||
// Cancel cancels the execution of function by the `TimedWorker`
|
||||
func (w *TimedWorker) Cancel() {
|
||||
if w != nil {
|
||||
w.Timer.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
// TimedWorkerQueue keeps a set of TimedWorkers that are still wait for execution.
|
||||
type TimedWorkerQueue struct {
|
||||
sync.Mutex
|
||||
// map of workers keyed by string returned by 'KeyFromWorkArgs' from the given worker.
|
||||
workers map[string]*TimedWorker
|
||||
workFunc func(args *WorkArgs) error
|
||||
}
|
||||
|
||||
// CreateWorkerQueue creates a new TimedWorkerQueue for workers that will execute
|
||||
// given function `f`.
|
||||
func CreateWorkerQueue(f func(args *WorkArgs) error) *TimedWorkerQueue {
|
||||
return &TimedWorkerQueue{
|
||||
workers: make(map[string]*TimedWorker),
|
||||
workFunc: f,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(args *WorkArgs) error {
|
||||
return func(args *WorkArgs) error {
|
||||
err := q.workFunc(args)
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
if err == nil {
|
||||
// To avoid duplicated calls we keep the key in the queue, to prevent
|
||||
// subsequent additions.
|
||||
q.workers[key] = nil
|
||||
} else {
|
||||
delete(q.workers, key)
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
|
||||
func (q *TimedWorkerQueue) AddWork(args *WorkArgs, createdAt time.Time, fireAt time.Time) {
|
||||
key := args.KeyFromWorkArgs()
|
||||
klog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt)
|
||||
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
if _, exists := q.workers[key]; exists {
|
||||
klog.Warningf("Trying to add already existing work for %+v. Skipping.", args)
|
||||
return
|
||||
}
|
||||
worker := CreateWorker(args, createdAt, fireAt, q.getWrappedWorkerFunc(key))
|
||||
q.workers[key] = worker
|
||||
}
|
||||
|
||||
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
|
||||
func (q *TimedWorkerQueue) CancelWork(key string) bool {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
worker, found := q.workers[key]
|
||||
result := false
|
||||
if found {
|
||||
klog.V(4).Infof("Cancelling TimedWorkerQueue item %v at %v", key, time.Now())
|
||||
if worker != nil {
|
||||
result = true
|
||||
worker.Cancel()
|
||||
}
|
||||
delete(q.workers, key)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// GetWorkerUnsafe returns a TimedWorker corresponding to the given key.
|
||||
// Unsafe method - workers have attached goroutines which can fire afater this function is called.
|
||||
func (q *TimedWorkerQueue) GetWorkerUnsafe(key string) *TimedWorker {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
return q.workers[key]
|
||||
}
|
19
vendor/k8s.io/kubernetes/pkg/controller/service/doc.go
generated
vendored
Normal file
19
vendor/k8s.io/kubernetes/pkg/controller/service/doc.go
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package service contains code for syncing cloud load balancers
|
||||
// with the service registry.
|
||||
package service // import "k8s.io/kubernetes/pkg/controller/service"
|
769
vendor/k8s.io/kubernetes/pkg/controller/service/service_controller.go
generated
vendored
Normal file
769
vendor/k8s.io/kubernetes/pkg/controller/service/service_controller.go
generated
vendored
Normal file
@ -0,0 +1,769 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"reflect"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
"k8s.io/klog"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/util/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
// Interval of synchronizing service status from apiserver
|
||||
serviceSyncPeriod = 30 * time.Second
|
||||
// Interval of synchronizing node status from apiserver
|
||||
nodeSyncPeriod = 100 * time.Second
|
||||
|
||||
// How long to wait before retrying the processing of a service change.
|
||||
// If this changes, the sleep in hack/jenkins/e2e.sh before downing a cluster
|
||||
// should be changed appropriately.
|
||||
minRetryDelay = 5 * time.Second
|
||||
maxRetryDelay = 300 * time.Second
|
||||
|
||||
clientRetryCount = 5
|
||||
clientRetryInterval = 5 * time.Second
|
||||
|
||||
// LabelNodeRoleMaster specifies that a node is a master
|
||||
// It's copied over to kubeadm until it's merged in core: https://github.com/kubernetes/kubernetes/pull/39112
|
||||
LabelNodeRoleMaster = "node-role.kubernetes.io/master"
|
||||
|
||||
// LabelNodeRoleExcludeBalancer specifies that the node should be
|
||||
// exclude from load balancers created by a cloud provider.
|
||||
LabelNodeRoleExcludeBalancer = "alpha.service-controller.kubernetes.io/exclude-balancer"
|
||||
)
|
||||
|
||||
type cachedService struct {
|
||||
// The cached state of the service
|
||||
state *v1.Service
|
||||
}
|
||||
|
||||
type serviceCache struct {
|
||||
mu sync.Mutex // protects serviceMap
|
||||
serviceMap map[string]*cachedService
|
||||
}
|
||||
|
||||
// ServiceController keeps cloud provider service resources
|
||||
// (like load balancers) in sync with the registry.
|
||||
type ServiceController struct {
|
||||
cloud cloudprovider.Interface
|
||||
knownHosts []*v1.Node
|
||||
servicesToUpdate []*v1.Service
|
||||
kubeClient clientset.Interface
|
||||
clusterName string
|
||||
balancer cloudprovider.LoadBalancer
|
||||
cache *serviceCache
|
||||
serviceLister corelisters.ServiceLister
|
||||
serviceListerSynced cache.InformerSynced
|
||||
eventBroadcaster record.EventBroadcaster
|
||||
eventRecorder record.EventRecorder
|
||||
nodeLister corelisters.NodeLister
|
||||
nodeListerSynced cache.InformerSynced
|
||||
// services that need to be synced
|
||||
queue workqueue.RateLimitingInterface
|
||||
}
|
||||
|
||||
// New returns a new service controller to keep cloud provider service resources
|
||||
// (like load balancers) in sync with the registry.
|
||||
func New(
|
||||
cloud cloudprovider.Interface,
|
||||
kubeClient clientset.Interface,
|
||||
serviceInformer coreinformers.ServiceInformer,
|
||||
nodeInformer coreinformers.NodeInformer,
|
||||
clusterName string,
|
||||
) (*ServiceController, error) {
|
||||
broadcaster := record.NewBroadcaster()
|
||||
broadcaster.StartLogging(klog.Infof)
|
||||
broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
|
||||
recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "service-controller"})
|
||||
|
||||
if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
|
||||
if err := metrics.RegisterMetricAndTrackRateLimiterUsage("service_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
s := &ServiceController{
|
||||
cloud: cloud,
|
||||
knownHosts: []*v1.Node{},
|
||||
kubeClient: kubeClient,
|
||||
clusterName: clusterName,
|
||||
cache: &serviceCache{serviceMap: make(map[string]*cachedService)},
|
||||
eventBroadcaster: broadcaster,
|
||||
eventRecorder: recorder,
|
||||
nodeLister: nodeInformer.Lister(),
|
||||
nodeListerSynced: nodeInformer.Informer().HasSynced,
|
||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(minRetryDelay, maxRetryDelay), "service"),
|
||||
}
|
||||
|
||||
serviceInformer.Informer().AddEventHandlerWithResyncPeriod(
|
||||
cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: s.enqueueService,
|
||||
UpdateFunc: func(old, cur interface{}) {
|
||||
oldSvc, ok1 := old.(*v1.Service)
|
||||
curSvc, ok2 := cur.(*v1.Service)
|
||||
if ok1 && ok2 && s.needsUpdate(oldSvc, curSvc) {
|
||||
s.enqueueService(cur)
|
||||
}
|
||||
},
|
||||
DeleteFunc: s.enqueueService,
|
||||
},
|
||||
serviceSyncPeriod,
|
||||
)
|
||||
s.serviceLister = serviceInformer.Lister()
|
||||
s.serviceListerSynced = serviceInformer.Informer().HasSynced
|
||||
|
||||
if err := s.init(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// obj could be an *v1.Service, or a DeletionFinalStateUnknown marker item.
|
||||
func (s *ServiceController) enqueueService(obj interface{}) {
|
||||
key, err := controller.KeyFunc(obj)
|
||||
if err != nil {
|
||||
klog.Errorf("Couldn't get key for object %#v: %v", obj, err)
|
||||
return
|
||||
}
|
||||
s.queue.Add(key)
|
||||
}
|
||||
|
||||
// Run starts a background goroutine that watches for changes to services that
|
||||
// have (or had) LoadBalancers=true and ensures that they have
|
||||
// load balancers created and deleted appropriately.
|
||||
// serviceSyncPeriod controls how often we check the cluster's services to
|
||||
// ensure that the correct load balancers exist.
|
||||
// nodeSyncPeriod controls how often we check the cluster's nodes to determine
|
||||
// if load balancers need to be updated to point to a new set.
|
||||
//
|
||||
// It's an error to call Run() more than once for a given ServiceController
|
||||
// object.
|
||||
func (s *ServiceController) Run(stopCh <-chan struct{}, workers int) {
|
||||
defer runtime.HandleCrash()
|
||||
defer s.queue.ShutDown()
|
||||
|
||||
klog.Info("Starting service controller")
|
||||
defer klog.Info("Shutting down service controller")
|
||||
|
||||
if !controller.WaitForCacheSync("service", stopCh, s.serviceListerSynced, s.nodeListerSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < workers; i++ {
|
||||
go wait.Until(s.worker, time.Second, stopCh)
|
||||
}
|
||||
|
||||
go wait.Until(s.nodeSyncLoop, nodeSyncPeriod, stopCh)
|
||||
|
||||
<-stopCh
|
||||
}
|
||||
|
||||
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
|
||||
// It enforces that the syncHandler is never invoked concurrently with the same key.
|
||||
func (s *ServiceController) worker() {
|
||||
for s.processNextWorkItem() {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ServiceController) processNextWorkItem() bool {
|
||||
key, quit := s.queue.Get()
|
||||
if quit {
|
||||
return false
|
||||
}
|
||||
defer s.queue.Done(key)
|
||||
|
||||
err := s.syncService(key.(string))
|
||||
if err == nil {
|
||||
s.queue.Forget(key)
|
||||
return true
|
||||
}
|
||||
|
||||
runtime.HandleError(fmt.Errorf("error processing service %v (will retry): %v", key, err))
|
||||
s.queue.AddRateLimited(key)
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *ServiceController) init() error {
|
||||
if s.cloud == nil {
|
||||
return fmt.Errorf("WARNING: no cloud provider provided, services of type LoadBalancer will fail")
|
||||
}
|
||||
|
||||
balancer, ok := s.cloud.LoadBalancer()
|
||||
if !ok {
|
||||
return fmt.Errorf("the cloud provider does not support external load balancers")
|
||||
}
|
||||
s.balancer = balancer
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// processServiceUpdate operates loadbalancers for the incoming service accordingly.
|
||||
// Returns an error if processing the service update failed.
|
||||
func (s *ServiceController) processServiceUpdate(cachedService *cachedService, service *v1.Service, key string) error {
|
||||
if cachedService.state != nil {
|
||||
if cachedService.state.UID != service.UID {
|
||||
err := s.processLoadBalancerDelete(cachedService, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
// cache the service, we need the info for service deletion
|
||||
cachedService.state = service
|
||||
err := s.createLoadBalancerIfNeeded(key, service)
|
||||
if err != nil {
|
||||
eventType := "CreatingLoadBalancerFailed"
|
||||
message := "Error creating load balancer (will retry): "
|
||||
if !wantsLoadBalancer(service) {
|
||||
eventType = "CleanupLoadBalancerFailed"
|
||||
message = "Error cleaning up load balancer (will retry): "
|
||||
}
|
||||
message += err.Error()
|
||||
s.eventRecorder.Event(service, v1.EventTypeWarning, eventType, message)
|
||||
return err
|
||||
}
|
||||
// Always update the cache upon success.
|
||||
// NOTE: Since we update the cached service if and only if we successfully
|
||||
// processed it, a cached service being nil implies that it hasn't yet
|
||||
// been successfully processed.
|
||||
s.cache.set(key, cachedService)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// createLoadBalancerIfNeeded ensures that service's status is synced up with loadbalancer
|
||||
// i.e. creates loadbalancer for service if requested and deletes loadbalancer if the service
|
||||
// doesn't want a loadbalancer no more. Returns whatever error occurred.
|
||||
func (s *ServiceController) createLoadBalancerIfNeeded(key string, service *v1.Service) error {
|
||||
// Note: It is safe to just call EnsureLoadBalancer. But, on some clouds that requires a delete & create,
|
||||
// which may involve service interruption. Also, we would like user-friendly events.
|
||||
|
||||
// Save the state so we can avoid a write if it doesn't change
|
||||
previousState := v1helper.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer)
|
||||
var newState *v1.LoadBalancerStatus
|
||||
var err error
|
||||
|
||||
if !wantsLoadBalancer(service) {
|
||||
_, exists, err := s.balancer.GetLoadBalancer(context.TODO(), s.clusterName, service)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting LB for service %s: %v", key, err)
|
||||
}
|
||||
if exists {
|
||||
klog.Infof("Deleting existing load balancer for service %s that no longer needs a load balancer.", key)
|
||||
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletingLoadBalancer", "Deleting load balancer")
|
||||
if err := s.balancer.EnsureLoadBalancerDeleted(context.TODO(), s.clusterName, service); err != nil {
|
||||
return err
|
||||
}
|
||||
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletedLoadBalancer", "Deleted load balancer")
|
||||
}
|
||||
|
||||
newState = &v1.LoadBalancerStatus{}
|
||||
} else {
|
||||
klog.V(2).Infof("Ensuring LB for service %s", key)
|
||||
|
||||
// TODO: We could do a dry-run here if wanted to avoid the spurious cloud-calls & events when we restart
|
||||
|
||||
s.eventRecorder.Event(service, v1.EventTypeNormal, "EnsuringLoadBalancer", "Ensuring load balancer")
|
||||
newState, err = s.ensureLoadBalancer(service)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to ensure load balancer for service %s: %v", key, err)
|
||||
}
|
||||
s.eventRecorder.Event(service, v1.EventTypeNormal, "EnsuredLoadBalancer", "Ensured load balancer")
|
||||
}
|
||||
|
||||
// Write the state if changed
|
||||
// TODO: Be careful here ... what if there were other changes to the service?
|
||||
if !v1helper.LoadBalancerStatusEqual(previousState, newState) {
|
||||
// Make a copy so we don't mutate the shared informer cache
|
||||
service = service.DeepCopy()
|
||||
|
||||
// Update the status on the copy
|
||||
service.Status.LoadBalancer = *newState
|
||||
|
||||
if err := s.persistUpdate(service); err != nil {
|
||||
// TODO: This logic needs to be revisited. We might want to retry on all the errors, not just conflicts.
|
||||
if errors.IsConflict(err) {
|
||||
return fmt.Errorf("not persisting update to service '%s/%s' that has been changed since we received it: %v", service.Namespace, service.Name, err)
|
||||
}
|
||||
runtime.HandleError(fmt.Errorf("failed to persist service %q updated status to apiserver, even after retries. Giving up: %v", key, err))
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
klog.V(2).Infof("Not persisting unchanged LoadBalancerStatus for service %s to registry.", key)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *ServiceController) persistUpdate(service *v1.Service) error {
|
||||
var err error
|
||||
for i := 0; i < clientRetryCount; i++ {
|
||||
_, err = s.kubeClient.CoreV1().Services(service.Namespace).UpdateStatus(service)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
// If the object no longer exists, we don't want to recreate it. Just bail
|
||||
// out so that we can process the delete, which we should soon be receiving
|
||||
// if we haven't already.
|
||||
if errors.IsNotFound(err) {
|
||||
klog.Infof("Not persisting update to service '%s/%s' that no longer exists: %v",
|
||||
service.Namespace, service.Name, err)
|
||||
return nil
|
||||
}
|
||||
// TODO: Try to resolve the conflict if the change was unrelated to load
|
||||
// balancer status. For now, just pass it up the stack.
|
||||
if errors.IsConflict(err) {
|
||||
return err
|
||||
}
|
||||
klog.Warningf("Failed to persist updated LoadBalancerStatus to service '%s/%s' after creating its load balancer: %v",
|
||||
service.Namespace, service.Name, err)
|
||||
time.Sleep(clientRetryInterval)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *ServiceController) ensureLoadBalancer(service *v1.Service) (*v1.LoadBalancerStatus, error) {
|
||||
nodes, err := s.nodeLister.ListWithPredicate(getNodeConditionPredicate())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there are no available nodes for LoadBalancer service, make a EventTypeWarning event for it.
|
||||
if len(nodes) == 0 {
|
||||
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "UnAvailableLoadBalancer", "There are no available nodes for LoadBalancer service %s/%s", service.Namespace, service.Name)
|
||||
}
|
||||
|
||||
// - Only one protocol supported per service
|
||||
// - Not all cloud providers support all protocols and the next step is expected to return
|
||||
// an error for unsupported protocols
|
||||
return s.balancer.EnsureLoadBalancer(context.TODO(), s.clusterName, service, nodes)
|
||||
}
|
||||
|
||||
// ListKeys implements the interface required by DeltaFIFO to list the keys we
|
||||
// already know about.
|
||||
func (s *serviceCache) ListKeys() []string {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
keys := make([]string, 0, len(s.serviceMap))
|
||||
for k := range s.serviceMap {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
// GetByKey returns the value stored in the serviceMap under the given key
|
||||
func (s *serviceCache) GetByKey(key string) (interface{}, bool, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if v, ok := s.serviceMap[key]; ok {
|
||||
return v, true, nil
|
||||
}
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
// ListKeys implements the interface required by DeltaFIFO to list the keys we
|
||||
// already know about.
|
||||
func (s *serviceCache) allServices() []*v1.Service {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
services := make([]*v1.Service, 0, len(s.serviceMap))
|
||||
for _, v := range s.serviceMap {
|
||||
services = append(services, v.state)
|
||||
}
|
||||
return services
|
||||
}
|
||||
|
||||
func (s *serviceCache) get(serviceName string) (*cachedService, bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
service, ok := s.serviceMap[serviceName]
|
||||
return service, ok
|
||||
}
|
||||
|
||||
func (s *serviceCache) getOrCreate(serviceName string) *cachedService {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
service, ok := s.serviceMap[serviceName]
|
||||
if !ok {
|
||||
service = &cachedService{}
|
||||
s.serviceMap[serviceName] = service
|
||||
}
|
||||
return service
|
||||
}
|
||||
|
||||
func (s *serviceCache) set(serviceName string, service *cachedService) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.serviceMap[serviceName] = service
|
||||
}
|
||||
|
||||
func (s *serviceCache) delete(serviceName string) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
delete(s.serviceMap, serviceName)
|
||||
}
|
||||
|
||||
func (s *ServiceController) needsUpdate(oldService *v1.Service, newService *v1.Service) bool {
|
||||
if !wantsLoadBalancer(oldService) && !wantsLoadBalancer(newService) {
|
||||
return false
|
||||
}
|
||||
if wantsLoadBalancer(oldService) != wantsLoadBalancer(newService) {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "Type", "%v -> %v",
|
||||
oldService.Spec.Type, newService.Spec.Type)
|
||||
return true
|
||||
}
|
||||
|
||||
if wantsLoadBalancer(newService) && !reflect.DeepEqual(oldService.Spec.LoadBalancerSourceRanges, newService.Spec.LoadBalancerSourceRanges) {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "LoadBalancerSourceRanges", "%v -> %v",
|
||||
oldService.Spec.LoadBalancerSourceRanges, newService.Spec.LoadBalancerSourceRanges)
|
||||
return true
|
||||
}
|
||||
|
||||
if !portsEqualForLB(oldService, newService) || oldService.Spec.SessionAffinity != newService.Spec.SessionAffinity {
|
||||
return true
|
||||
}
|
||||
if !loadBalancerIPsAreEqual(oldService, newService) {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "LoadbalancerIP", "%v -> %v",
|
||||
oldService.Spec.LoadBalancerIP, newService.Spec.LoadBalancerIP)
|
||||
return true
|
||||
}
|
||||
if len(oldService.Spec.ExternalIPs) != len(newService.Spec.ExternalIPs) {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "ExternalIP", "Count: %v -> %v",
|
||||
len(oldService.Spec.ExternalIPs), len(newService.Spec.ExternalIPs))
|
||||
return true
|
||||
}
|
||||
for i := range oldService.Spec.ExternalIPs {
|
||||
if oldService.Spec.ExternalIPs[i] != newService.Spec.ExternalIPs[i] {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "ExternalIP", "Added: %v",
|
||||
newService.Spec.ExternalIPs[i])
|
||||
return true
|
||||
}
|
||||
}
|
||||
if !reflect.DeepEqual(oldService.Annotations, newService.Annotations) {
|
||||
return true
|
||||
}
|
||||
if oldService.UID != newService.UID {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "UID", "%v -> %v",
|
||||
oldService.UID, newService.UID)
|
||||
return true
|
||||
}
|
||||
if oldService.Spec.ExternalTrafficPolicy != newService.Spec.ExternalTrafficPolicy {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "ExternalTrafficPolicy", "%v -> %v",
|
||||
oldService.Spec.ExternalTrafficPolicy, newService.Spec.ExternalTrafficPolicy)
|
||||
return true
|
||||
}
|
||||
if oldService.Spec.HealthCheckNodePort != newService.Spec.HealthCheckNodePort {
|
||||
s.eventRecorder.Eventf(newService, v1.EventTypeNormal, "HealthCheckNodePort", "%v -> %v",
|
||||
oldService.Spec.HealthCheckNodePort, newService.Spec.HealthCheckNodePort)
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *ServiceController) loadBalancerName(service *v1.Service) string {
|
||||
return s.balancer.GetLoadBalancerName(context.TODO(), "", service)
|
||||
}
|
||||
|
||||
func getPortsForLB(service *v1.Service) ([]*v1.ServicePort, error) {
|
||||
var protocol v1.Protocol
|
||||
|
||||
ports := []*v1.ServicePort{}
|
||||
for i := range service.Spec.Ports {
|
||||
sp := &service.Spec.Ports[i]
|
||||
// The check on protocol was removed here. The cloud provider itself is now responsible for all protocol validation
|
||||
ports = append(ports, sp)
|
||||
if protocol == "" {
|
||||
protocol = sp.Protocol
|
||||
} else if protocol != sp.Protocol && wantsLoadBalancer(service) {
|
||||
// TODO: Convert error messages to use event recorder
|
||||
return nil, fmt.Errorf("mixed protocol external load balancers are not supported")
|
||||
}
|
||||
}
|
||||
return ports, nil
|
||||
}
|
||||
|
||||
func portsEqualForLB(x, y *v1.Service) bool {
|
||||
xPorts, err := getPortsForLB(x)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
yPorts, err := getPortsForLB(y)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return portSlicesEqualForLB(xPorts, yPorts)
|
||||
}
|
||||
|
||||
func portSlicesEqualForLB(x, y []*v1.ServicePort) bool {
|
||||
if len(x) != len(y) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i := range x {
|
||||
if !portEqualForLB(x[i], y[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func portEqualForLB(x, y *v1.ServicePort) bool {
|
||||
// TODO: Should we check name? (In theory, an LB could expose it)
|
||||
if x.Name != y.Name {
|
||||
return false
|
||||
}
|
||||
|
||||
if x.Protocol != y.Protocol {
|
||||
return false
|
||||
}
|
||||
|
||||
if x.Port != y.Port {
|
||||
return false
|
||||
}
|
||||
|
||||
if x.NodePort != y.NodePort {
|
||||
return false
|
||||
}
|
||||
|
||||
// We don't check TargetPort; that is not relevant for load balancing
|
||||
// TODO: Should we blank it out? Or just check it anyway?
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func nodeNames(nodes []*v1.Node) sets.String {
|
||||
ret := sets.NewString()
|
||||
for _, node := range nodes {
|
||||
ret.Insert(node.Name)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func nodeSlicesEqualForLB(x, y []*v1.Node) bool {
|
||||
if len(x) != len(y) {
|
||||
return false
|
||||
}
|
||||
return nodeNames(x).Equal(nodeNames(y))
|
||||
}
|
||||
|
||||
func getNodeConditionPredicate() corelisters.NodeConditionPredicate {
|
||||
return func(node *v1.Node) bool {
|
||||
// We add the master to the node list, but its unschedulable. So we use this to filter
|
||||
// the master.
|
||||
if node.Spec.Unschedulable {
|
||||
return false
|
||||
}
|
||||
|
||||
// As of 1.6, we will taint the master, but not necessarily mark it unschedulable.
|
||||
// Recognize nodes labeled as master, and filter them also, as we were doing previously.
|
||||
if _, hasMasterRoleLabel := node.Labels[LabelNodeRoleMaster]; hasMasterRoleLabel {
|
||||
return false
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.ServiceNodeExclusion) {
|
||||
if _, hasExcludeBalancerLabel := node.Labels[LabelNodeRoleExcludeBalancer]; hasExcludeBalancerLabel {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// If we have no info, don't accept
|
||||
if len(node.Status.Conditions) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, cond := range node.Status.Conditions {
|
||||
// We consider the node for load balancing only when its NodeReady condition status
|
||||
// is ConditionTrue
|
||||
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
|
||||
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// nodeSyncLoop handles updating the hosts pointed to by all load
|
||||
// balancers whenever the set of nodes in the cluster changes.
|
||||
func (s *ServiceController) nodeSyncLoop() {
|
||||
newHosts, err := s.nodeLister.ListWithPredicate(getNodeConditionPredicate())
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to retrieve current set of nodes from node lister: %v", err)
|
||||
return
|
||||
}
|
||||
if nodeSlicesEqualForLB(newHosts, s.knownHosts) {
|
||||
// The set of nodes in the cluster hasn't changed, but we can retry
|
||||
// updating any services that we failed to update last time around.
|
||||
s.servicesToUpdate = s.updateLoadBalancerHosts(s.servicesToUpdate, newHosts)
|
||||
return
|
||||
}
|
||||
|
||||
klog.Infof("Detected change in list of current cluster nodes. New node set: %v",
|
||||
nodeNames(newHosts))
|
||||
|
||||
// Try updating all services, and save the ones that fail to try again next
|
||||
// round.
|
||||
s.servicesToUpdate = s.cache.allServices()
|
||||
numServices := len(s.servicesToUpdate)
|
||||
s.servicesToUpdate = s.updateLoadBalancerHosts(s.servicesToUpdate, newHosts)
|
||||
klog.Infof("Successfully updated %d out of %d load balancers to direct traffic to the updated set of nodes",
|
||||
numServices-len(s.servicesToUpdate), numServices)
|
||||
|
||||
s.knownHosts = newHosts
|
||||
}
|
||||
|
||||
// updateLoadBalancerHosts updates all existing load balancers so that
|
||||
// they will match the list of hosts provided.
|
||||
// Returns the list of services that couldn't be updated.
|
||||
func (s *ServiceController) updateLoadBalancerHosts(services []*v1.Service, hosts []*v1.Node) (servicesToRetry []*v1.Service) {
|
||||
for _, service := range services {
|
||||
func() {
|
||||
if service == nil {
|
||||
return
|
||||
}
|
||||
if err := s.lockedUpdateLoadBalancerHosts(service, hosts); err != nil {
|
||||
klog.Errorf("External error while updating load balancer: %v.", err)
|
||||
servicesToRetry = append(servicesToRetry, service)
|
||||
}
|
||||
}()
|
||||
}
|
||||
return servicesToRetry
|
||||
}
|
||||
|
||||
// Updates the load balancer of a service, assuming we hold the mutex
|
||||
// associated with the service.
|
||||
func (s *ServiceController) lockedUpdateLoadBalancerHosts(service *v1.Service, hosts []*v1.Node) error {
|
||||
if !wantsLoadBalancer(service) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// This operation doesn't normally take very long (and happens pretty often), so we only record the final event
|
||||
err := s.balancer.UpdateLoadBalancer(context.TODO(), s.clusterName, service, hosts)
|
||||
if err == nil {
|
||||
// If there are no available nodes for LoadBalancer service, make a EventTypeWarning event for it.
|
||||
if len(hosts) == 0 {
|
||||
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "UnAvailableLoadBalancer", "There are no available nodes for LoadBalancer service %s/%s", service.Namespace, service.Name)
|
||||
} else {
|
||||
s.eventRecorder.Event(service, v1.EventTypeNormal, "UpdatedLoadBalancer", "Updated load balancer with new hosts")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// It's only an actual error if the load balancer still exists.
|
||||
if _, exists, err := s.balancer.GetLoadBalancer(context.TODO(), s.clusterName, service); err != nil {
|
||||
klog.Errorf("External error while checking if load balancer %q exists: name, %v", s.balancer.GetLoadBalancerName(context.TODO(), s.clusterName, service), err)
|
||||
} else if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "LoadBalancerUpdateFailed", "Error updating load balancer with new hosts %v: %v", nodeNames(hosts), err)
|
||||
return err
|
||||
}
|
||||
|
||||
func wantsLoadBalancer(service *v1.Service) bool {
|
||||
return service.Spec.Type == v1.ServiceTypeLoadBalancer
|
||||
}
|
||||
|
||||
func loadBalancerIPsAreEqual(oldService, newService *v1.Service) bool {
|
||||
return oldService.Spec.LoadBalancerIP == newService.Spec.LoadBalancerIP
|
||||
}
|
||||
|
||||
// syncService will sync the Service with the given key if it has had its expectations fulfilled,
|
||||
// meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be
|
||||
// invoked concurrently with the same key.
|
||||
func (s *ServiceController) syncService(key string) error {
|
||||
startTime := time.Now()
|
||||
var cachedService *cachedService
|
||||
defer func() {
|
||||
klog.V(4).Infof("Finished syncing service %q (%v)", key, time.Since(startTime))
|
||||
}()
|
||||
|
||||
namespace, name, err := cache.SplitMetaNamespaceKey(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// service holds the latest service info from apiserver
|
||||
service, err := s.serviceLister.Services(namespace).Get(name)
|
||||
switch {
|
||||
case errors.IsNotFound(err):
|
||||
// service absence in store means watcher caught the deletion, ensure LB info is cleaned
|
||||
klog.Infof("Service has been deleted %v. Attempting to cleanup load balancer resources", key)
|
||||
err = s.processServiceDeletion(key)
|
||||
case err != nil:
|
||||
klog.Infof("Unable to retrieve service %v from store: %v", key, err)
|
||||
default:
|
||||
cachedService = s.cache.getOrCreate(key)
|
||||
err = s.processServiceUpdate(cachedService, service, key)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Returns an error if processing the service deletion failed, along with a time.Duration
|
||||
// indicating whether processing should be retried; zero means no-retry; otherwise
|
||||
// we should retry after that Duration.
|
||||
func (s *ServiceController) processServiceDeletion(key string) error {
|
||||
cachedService, ok := s.cache.get(key)
|
||||
if !ok {
|
||||
klog.Errorf("service %s not in cache even though the watcher thought it was. Ignoring the deletion", key)
|
||||
return nil
|
||||
}
|
||||
return s.processLoadBalancerDelete(cachedService, key)
|
||||
}
|
||||
|
||||
func (s *ServiceController) processLoadBalancerDelete(cachedService *cachedService, key string) error {
|
||||
service := cachedService.state
|
||||
// delete load balancer info only if the service type is LoadBalancer
|
||||
if !wantsLoadBalancer(service) {
|
||||
return nil
|
||||
}
|
||||
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletingLoadBalancer", "Deleting load balancer")
|
||||
err := s.balancer.EnsureLoadBalancerDeleted(context.TODO(), s.clusterName, service)
|
||||
if err != nil {
|
||||
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "DeletingLoadBalancerFailed", "Error deleting load balancer (will retry): %v", err)
|
||||
return err
|
||||
}
|
||||
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletedLoadBalancer", "Deleted load balancer")
|
||||
s.cache.delete(key)
|
||||
|
||||
return nil
|
||||
}
|
293
vendor/k8s.io/kubernetes/pkg/controller/util/node/controller_utils.go
generated
vendored
Normal file
293
vendor/k8s.io/kubernetes/pkg/controller/util/node/controller_utils.go
generated
vendored
Normal file
@ -0,0 +1,293 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
appsv1listers "k8s.io/client-go/listers/apps/v1"
|
||||
api "k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||
nodepkg "k8s.io/kubernetes/pkg/util/node"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
// DeletePods will delete all pods from master running on given node,
|
||||
// and return true if any pods were deleted, or were found pending
|
||||
// deletion.
|
||||
func DeletePods(kubeClient clientset.Interface, recorder record.EventRecorder, nodeName, nodeUID string, daemonStore appsv1listers.DaemonSetLister) (bool, error) {
|
||||
remaining := false
|
||||
selector := fields.OneTermEqualSelector(api.PodHostField, nodeName).String()
|
||||
options := metav1.ListOptions{FieldSelector: selector}
|
||||
pods, err := kubeClient.CoreV1().Pods(metav1.NamespaceAll).List(options)
|
||||
var updateErrList []error
|
||||
|
||||
if err != nil {
|
||||
return remaining, err
|
||||
}
|
||||
|
||||
if len(pods.Items) > 0 {
|
||||
RecordNodeEvent(recorder, nodeName, nodeUID, v1.EventTypeNormal, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName))
|
||||
}
|
||||
|
||||
for _, pod := range pods.Items {
|
||||
// Defensive check, also needed for tests.
|
||||
if pod.Spec.NodeName != nodeName {
|
||||
continue
|
||||
}
|
||||
|
||||
// Set reason and message in the pod object.
|
||||
if _, err = SetPodTerminationReason(kubeClient, &pod, nodeName); err != nil {
|
||||
if apierrors.IsConflict(err) {
|
||||
updateErrList = append(updateErrList,
|
||||
fmt.Errorf("update status failed for pod %q: %v", format.Pod(&pod), err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
// if the pod has already been marked for deletion, we still return true that there are remaining pods.
|
||||
if pod.DeletionGracePeriodSeconds != nil {
|
||||
remaining = true
|
||||
continue
|
||||
}
|
||||
// if the pod is managed by a daemonset, ignore it
|
||||
_, err := daemonStore.GetPodDaemonSets(&pod)
|
||||
if err == nil { // No error means at least one daemonset was found
|
||||
continue
|
||||
}
|
||||
|
||||
klog.V(2).Infof("Starting deletion of pod %v/%v", pod.Namespace, pod.Name)
|
||||
recorder.Eventf(&pod, v1.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName)
|
||||
if err := kubeClient.CoreV1().Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
|
||||
return false, err
|
||||
}
|
||||
remaining = true
|
||||
}
|
||||
|
||||
if len(updateErrList) > 0 {
|
||||
return false, utilerrors.NewAggregate(updateErrList)
|
||||
}
|
||||
return remaining, nil
|
||||
}
|
||||
|
||||
// SetPodTerminationReason attempts to set a reason and message in the
|
||||
// pod status, updates it in the apiserver, and returns an error if it
|
||||
// encounters one.
|
||||
func SetPodTerminationReason(kubeClient clientset.Interface, pod *v1.Pod, nodeName string) (*v1.Pod, error) {
|
||||
if pod.Status.Reason == nodepkg.NodeUnreachablePodReason {
|
||||
return pod, nil
|
||||
}
|
||||
|
||||
pod.Status.Reason = nodepkg.NodeUnreachablePodReason
|
||||
pod.Status.Message = fmt.Sprintf(nodepkg.NodeUnreachablePodMessage, nodeName, pod.Name)
|
||||
|
||||
var updatedPod *v1.Pod
|
||||
var err error
|
||||
if updatedPod, err = kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(pod); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return updatedPod, nil
|
||||
}
|
||||
|
||||
// MarkAllPodsNotReady updates ready status of all pods running on
|
||||
// given node from master return true if success
|
||||
func MarkAllPodsNotReady(kubeClient clientset.Interface, node *v1.Node) error {
|
||||
nodeName := node.Name
|
||||
klog.V(2).Infof("Update ready status of pods on node [%v]", nodeName)
|
||||
opts := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName).String()}
|
||||
pods, err := kubeClient.CoreV1().Pods(metav1.NamespaceAll).List(opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
errMsg := []string{}
|
||||
for _, pod := range pods.Items {
|
||||
// Defensive check, also needed for tests.
|
||||
if pod.Spec.NodeName != nodeName {
|
||||
continue
|
||||
}
|
||||
|
||||
for i, cond := range pod.Status.Conditions {
|
||||
if cond.Type == v1.PodReady {
|
||||
pod.Status.Conditions[i].Status = v1.ConditionFalse
|
||||
klog.V(2).Infof("Updating ready status of pod %v to false", pod.Name)
|
||||
_, err := kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(&pod)
|
||||
if err != nil {
|
||||
klog.Warningf("Failed to update status for pod %q: %v", format.Pod(&pod), err)
|
||||
errMsg = append(errMsg, fmt.Sprintf("%v", err))
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(errMsg) == 0 {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("%v", strings.Join(errMsg, "; "))
|
||||
}
|
||||
|
||||
// RecordNodeEvent records a event related to a node.
|
||||
func RecordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype, reason, event string) {
|
||||
ref := &v1.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: nodeName,
|
||||
UID: types.UID(nodeUID),
|
||||
Namespace: "",
|
||||
}
|
||||
klog.V(2).Infof("Recording %s event message for node %s", event, nodeName)
|
||||
recorder.Eventf(ref, eventtype, reason, "Node %s event: %s", nodeName, event)
|
||||
}
|
||||
|
||||
// RecordNodeStatusChange records a event related to a node status change. (Common to lifecycle and ipam)
|
||||
func RecordNodeStatusChange(recorder record.EventRecorder, node *v1.Node, newStatus string) {
|
||||
ref := &v1.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: node.Name,
|
||||
UID: node.UID,
|
||||
Namespace: "",
|
||||
}
|
||||
klog.V(2).Infof("Recording status change %s event message for node %s", newStatus, node.Name)
|
||||
// TODO: This requires a transaction, either both node status is updated
|
||||
// and event is recorded or neither should happen, see issue #6055.
|
||||
recorder.Eventf(ref, v1.EventTypeNormal, newStatus, "Node %s status is now: %s", node.Name, newStatus)
|
||||
}
|
||||
|
||||
// SwapNodeControllerTaint returns true in case of success and false
|
||||
// otherwise.
|
||||
func SwapNodeControllerTaint(kubeClient clientset.Interface, taintsToAdd, taintsToRemove []*v1.Taint, node *v1.Node) bool {
|
||||
for _, taintToAdd := range taintsToAdd {
|
||||
now := metav1.Now()
|
||||
taintToAdd.TimeAdded = &now
|
||||
}
|
||||
|
||||
err := controller.AddOrUpdateTaintOnNode(kubeClient, node.Name, taintsToAdd...)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(
|
||||
fmt.Errorf(
|
||||
"unable to taint %+v unresponsive Node %q: %v",
|
||||
taintsToAdd,
|
||||
node.Name,
|
||||
err))
|
||||
return false
|
||||
}
|
||||
klog.V(4).Infof("Added %+v Taint to Node %v", taintsToAdd, node.Name)
|
||||
|
||||
err = controller.RemoveTaintOffNode(kubeClient, node.Name, node, taintsToRemove...)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(
|
||||
fmt.Errorf(
|
||||
"unable to remove %+v unneeded taint from unresponsive Node %q: %v",
|
||||
taintsToRemove,
|
||||
node.Name,
|
||||
err))
|
||||
return false
|
||||
}
|
||||
klog.V(4).Infof("Made sure that Node %+v has no %v Taint", node.Name, taintsToRemove)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// AddOrUpdateLabelsOnNode updates the labels on the node and returns true on
|
||||
// success and false on failure.
|
||||
func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, labelsToUpdate map[string]string, node *v1.Node) bool {
|
||||
err := controller.AddOrUpdateLabelsOnNode(kubeClient, node.Name, labelsToUpdate)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(
|
||||
fmt.Errorf(
|
||||
"unable to update labels %+v for Node %q: %v",
|
||||
labelsToUpdate,
|
||||
node.Name,
|
||||
err))
|
||||
return false
|
||||
}
|
||||
klog.V(4).Infof("Updated labels %+v to Node %v", labelsToUpdate, node.Name)
|
||||
return true
|
||||
}
|
||||
|
||||
// CreateAddNodeHandler creates an add node handler.
|
||||
func CreateAddNodeHandler(f func(node *v1.Node) error) func(obj interface{}) {
|
||||
return func(originalObj interface{}) {
|
||||
node := originalObj.(*v1.Node).DeepCopy()
|
||||
if err := f(node); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("Error while processing Node Add: %v", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CreateUpdateNodeHandler creates a node update handler. (Common to lifecycle and ipam)
|
||||
func CreateUpdateNodeHandler(f func(oldNode, newNode *v1.Node) error) func(oldObj, newObj interface{}) {
|
||||
return func(origOldObj, origNewObj interface{}) {
|
||||
node := origNewObj.(*v1.Node).DeepCopy()
|
||||
prevNode := origOldObj.(*v1.Node).DeepCopy()
|
||||
|
||||
if err := f(prevNode, node); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("Error while processing Node Add/Delete: %v", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CreateDeleteNodeHandler creates a delete node handler. (Common to lifecycle and ipam)
|
||||
func CreateDeleteNodeHandler(f func(node *v1.Node) error) func(obj interface{}) {
|
||||
return func(originalObj interface{}) {
|
||||
originalNode, isNode := originalObj.(*v1.Node)
|
||||
// We can get DeletedFinalStateUnknown instead of *v1.Node here and
|
||||
// we need to handle that correctly. #34692
|
||||
if !isNode {
|
||||
deletedState, ok := originalObj.(cache.DeletedFinalStateUnknown)
|
||||
if !ok {
|
||||
klog.Errorf("Received unexpected object: %v", originalObj)
|
||||
return
|
||||
}
|
||||
originalNode, ok = deletedState.Obj.(*v1.Node)
|
||||
if !ok {
|
||||
klog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj)
|
||||
return
|
||||
}
|
||||
}
|
||||
node := originalNode.DeepCopy()
|
||||
if err := f(node); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("Error while processing Node Add/Delete: %v", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetNodeCondition extracts the provided condition from the given status and returns that.
|
||||
// Returns nil and -1 if the condition is not present, and the index of the located condition.
|
||||
func GetNodeCondition(status *v1.NodeStatus, conditionType v1.NodeConditionType) (int, *v1.NodeCondition) {
|
||||
if status == nil {
|
||||
return -1, nil
|
||||
}
|
||||
for i := range status.Conditions {
|
||||
if status.Conditions[i].Type == conditionType {
|
||||
return i, &status.Conditions[i]
|
||||
}
|
||||
}
|
||||
return -1, nil
|
||||
}
|
34
vendor/k8s.io/kubernetes/pkg/controller/volume/events/event.go
generated
vendored
Normal file
34
vendor/k8s.io/kubernetes/pkg/controller/volume/events/event.go
generated
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package events
|
||||
|
||||
const (
|
||||
// volume relevant event reasons
|
||||
FailedBinding = "FailedBinding"
|
||||
VolumeMismatch = "VolumeMismatch"
|
||||
VolumeFailedRecycle = "VolumeFailedRecycle"
|
||||
VolumeRecycled = "VolumeRecycled"
|
||||
RecyclerPod = "RecyclerPod"
|
||||
VolumeDelete = "VolumeDelete"
|
||||
VolumeFailedDelete = "VolumeFailedDelete"
|
||||
ExternalProvisioning = "ExternalProvisioning"
|
||||
ProvisioningFailed = "ProvisioningFailed"
|
||||
ProvisioningCleanupFailed = "ProvisioningCleanupFailed"
|
||||
ProvisioningSucceeded = "ProvisioningSucceeded"
|
||||
WaitForFirstConsumer = "WaitForFirstConsumer"
|
||||
ExternalExpanding = "ExternalExpanding"
|
||||
)
|
380
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/index.go
generated
vendored
Normal file
380
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/index.go
generated
vendored
Normal file
@ -0,0 +1,380 @@
|
||||
/*
|
||||
Copyright 2014 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
||||
)
|
||||
|
||||
// persistentVolumeOrderedIndex is a cache.Store that keeps persistent volumes
|
||||
// indexed by AccessModes and ordered by storage capacity.
|
||||
type persistentVolumeOrderedIndex struct {
|
||||
store cache.Indexer
|
||||
}
|
||||
|
||||
func newPersistentVolumeOrderedIndex() persistentVolumeOrderedIndex {
|
||||
return persistentVolumeOrderedIndex{cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{"accessmodes": accessModesIndexFunc})}
|
||||
}
|
||||
|
||||
// accessModesIndexFunc is an indexing function that returns a persistent
|
||||
// volume's AccessModes as a string
|
||||
func accessModesIndexFunc(obj interface{}) ([]string, error) {
|
||||
if pv, ok := obj.(*v1.PersistentVolume); ok {
|
||||
modes := v1helper.GetAccessModesAsString(pv.Spec.AccessModes)
|
||||
return []string{modes}, nil
|
||||
}
|
||||
return []string{""}, fmt.Errorf("object is not a persistent volume: %v", obj)
|
||||
}
|
||||
|
||||
// listByAccessModes returns all volumes with the given set of
|
||||
// AccessModeTypes. The list is unsorted!
|
||||
func (pvIndex *persistentVolumeOrderedIndex) listByAccessModes(modes []v1.PersistentVolumeAccessMode) ([]*v1.PersistentVolume, error) {
|
||||
pv := &v1.PersistentVolume{
|
||||
Spec: v1.PersistentVolumeSpec{
|
||||
AccessModes: modes,
|
||||
},
|
||||
}
|
||||
|
||||
objs, err := pvIndex.store.Index("accessmodes", pv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
volumes := make([]*v1.PersistentVolume, len(objs))
|
||||
for i, obj := range objs {
|
||||
volumes[i] = obj.(*v1.PersistentVolume)
|
||||
}
|
||||
|
||||
return volumes, nil
|
||||
}
|
||||
|
||||
// find returns the nearest PV from the ordered list or nil if a match is not found
|
||||
func (pvIndex *persistentVolumeOrderedIndex) findByClaim(claim *v1.PersistentVolumeClaim, delayBinding bool) (*v1.PersistentVolume, error) {
|
||||
// PVs are indexed by their access modes to allow easier searching. Each
|
||||
// index is the string representation of a set of access modes. There is a
|
||||
// finite number of possible sets and PVs will only be indexed in one of
|
||||
// them (whichever index matches the PV's modes).
|
||||
//
|
||||
// A request for resources will always specify its desired access modes.
|
||||
// Any matching PV must have at least that number of access modes, but it
|
||||
// can have more. For example, a user asks for ReadWriteOnce but a GCEPD
|
||||
// is available, which is ReadWriteOnce+ReadOnlyMany.
|
||||
//
|
||||
// Searches are performed against a set of access modes, so we can attempt
|
||||
// not only the exact matching modes but also potential matches (the GCEPD
|
||||
// example above).
|
||||
allPossibleModes := pvIndex.allPossibleMatchingAccessModes(claim.Spec.AccessModes)
|
||||
|
||||
for _, modes := range allPossibleModes {
|
||||
volumes, err := pvIndex.listByAccessModes(modes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bestVol, err := findMatchingVolume(claim, volumes, nil /* node for topology binding*/, nil /* exclusion map */, delayBinding)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if bestVol != nil {
|
||||
return bestVol, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// findMatchingVolume goes through the list of volumes to find the best matching volume
|
||||
// for the claim.
|
||||
//
|
||||
// This function is used by both the PV controller and scheduler.
|
||||
//
|
||||
// delayBinding is true only in the PV controller path. When set, prebound PVs are still returned
|
||||
// as a match for the claim, but unbound PVs are skipped.
|
||||
//
|
||||
// node is set only in the scheduler path. When set, the PV node affinity is checked against
|
||||
// the node's labels.
|
||||
//
|
||||
// excludedVolumes is only used in the scheduler path, and is needed for evaluating multiple
|
||||
// unbound PVCs for a single Pod at one time. As each PVC finds a matching PV, the chosen
|
||||
// PV needs to be excluded from future matching.
|
||||
func findMatchingVolume(
|
||||
claim *v1.PersistentVolumeClaim,
|
||||
volumes []*v1.PersistentVolume,
|
||||
node *v1.Node,
|
||||
excludedVolumes map[string]*v1.PersistentVolume,
|
||||
delayBinding bool) (*v1.PersistentVolume, error) {
|
||||
|
||||
var smallestVolume *v1.PersistentVolume
|
||||
var smallestVolumeQty resource.Quantity
|
||||
requestedQty := claim.Spec.Resources.Requests[v1.ResourceName(v1.ResourceStorage)]
|
||||
requestedClass := v1helper.GetPersistentVolumeClaimClass(claim)
|
||||
|
||||
var selector labels.Selector
|
||||
if claim.Spec.Selector != nil {
|
||||
internalSelector, err := metav1.LabelSelectorAsSelector(claim.Spec.Selector)
|
||||
if err != nil {
|
||||
// should be unreachable code due to validation
|
||||
return nil, fmt.Errorf("error creating internal label selector for claim: %v: %v", claimToClaimKey(claim), err)
|
||||
}
|
||||
selector = internalSelector
|
||||
}
|
||||
|
||||
// Go through all available volumes with two goals:
|
||||
// - find a volume that is either pre-bound by user or dynamically
|
||||
// provisioned for this claim. Because of this we need to loop through
|
||||
// all volumes.
|
||||
// - find the smallest matching one if there is no volume pre-bound to
|
||||
// the claim.
|
||||
for _, volume := range volumes {
|
||||
if _, ok := excludedVolumes[volume.Name]; ok {
|
||||
// Skip volumes in the excluded list
|
||||
continue
|
||||
}
|
||||
|
||||
volumeQty := volume.Spec.Capacity[v1.ResourceStorage]
|
||||
|
||||
// check if volumeModes do not match (feature gate protected)
|
||||
isMismatch, err := checkVolumeModeMismatches(&claim.Spec, &volume.Spec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error checking if volumeMode was a mismatch: %v", err)
|
||||
}
|
||||
// filter out mismatching volumeModes
|
||||
if isMismatch {
|
||||
continue
|
||||
}
|
||||
|
||||
// check if PV's DeletionTimeStamp is set, if so, skip this volume.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.StorageObjectInUseProtection) {
|
||||
if volume.ObjectMeta.DeletionTimestamp != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
nodeAffinityValid := true
|
||||
if node != nil {
|
||||
// Scheduler path, check that the PV NodeAffinity
|
||||
// is satisfied by the node
|
||||
err := volumeutil.CheckNodeAffinity(volume, node.Labels)
|
||||
if err != nil {
|
||||
nodeAffinityValid = false
|
||||
}
|
||||
}
|
||||
|
||||
if IsVolumeBoundToClaim(volume, claim) {
|
||||
// this claim and volume are pre-bound; return
|
||||
// the volume if the size request is satisfied,
|
||||
// otherwise continue searching for a match
|
||||
if volumeQty.Cmp(requestedQty) < 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// If PV node affinity is invalid, return no match.
|
||||
// This means the prebound PV (and therefore PVC)
|
||||
// is not suitable for this node.
|
||||
if !nodeAffinityValid {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return volume, nil
|
||||
}
|
||||
|
||||
if node == nil && delayBinding {
|
||||
// PV controller does not bind this claim.
|
||||
// Scheduler will handle binding unbound volumes
|
||||
// Scheduler path will have node != nil
|
||||
continue
|
||||
}
|
||||
|
||||
// filter out:
|
||||
// - volumes in non-available phase
|
||||
// - volumes bound to another claim
|
||||
// - volumes whose labels don't match the claim's selector, if specified
|
||||
// - volumes in Class that is not requested
|
||||
// - volumes whose NodeAffinity does not match the node
|
||||
if volume.Status.Phase != v1.VolumeAvailable {
|
||||
// We ignore volumes in non-available phase, because volumes that
|
||||
// satisfies matching criteria will be updated to available, binding
|
||||
// them now has high chance of encountering unnecessary failures
|
||||
// due to API conflicts.
|
||||
continue
|
||||
} else if volume.Spec.ClaimRef != nil {
|
||||
continue
|
||||
} else if selector != nil && !selector.Matches(labels.Set(volume.Labels)) {
|
||||
continue
|
||||
}
|
||||
if v1helper.GetPersistentVolumeClass(volume) != requestedClass {
|
||||
continue
|
||||
}
|
||||
if !nodeAffinityValid {
|
||||
continue
|
||||
}
|
||||
|
||||
if node != nil {
|
||||
// Scheduler path
|
||||
// Check that the access modes match
|
||||
if !checkAccessModes(claim, volume) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if volumeQty.Cmp(requestedQty) >= 0 {
|
||||
if smallestVolume == nil || smallestVolumeQty.Cmp(volumeQty) > 0 {
|
||||
smallestVolume = volume
|
||||
smallestVolumeQty = volumeQty
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if smallestVolume != nil {
|
||||
// Found a matching volume
|
||||
return smallestVolume, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// checkVolumeModeMismatches is a convenience method that checks volumeMode for PersistentVolume
|
||||
// and PersistentVolumeClaims
|
||||
func checkVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1.PersistentVolumeSpec) (bool, error) {
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.BlockVolume) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// In HA upgrades, we cannot guarantee that the apiserver is on a version >= controller-manager.
|
||||
// So we default a nil volumeMode to filesystem
|
||||
requestedVolumeMode := v1.PersistentVolumeFilesystem
|
||||
if pvcSpec.VolumeMode != nil {
|
||||
requestedVolumeMode = *pvcSpec.VolumeMode
|
||||
}
|
||||
pvVolumeMode := v1.PersistentVolumeFilesystem
|
||||
if pvSpec.VolumeMode != nil {
|
||||
pvVolumeMode = *pvSpec.VolumeMode
|
||||
}
|
||||
return requestedVolumeMode != pvVolumeMode, nil
|
||||
}
|
||||
|
||||
// findBestMatchForClaim is a convenience method that finds a volume by the claim's AccessModes and requests for Storage
|
||||
func (pvIndex *persistentVolumeOrderedIndex) findBestMatchForClaim(claim *v1.PersistentVolumeClaim, delayBinding bool) (*v1.PersistentVolume, error) {
|
||||
return pvIndex.findByClaim(claim, delayBinding)
|
||||
}
|
||||
|
||||
// allPossibleMatchingAccessModes returns an array of AccessMode arrays that
|
||||
// can satisfy a user's requested modes.
|
||||
//
|
||||
// see comments in the Find func above regarding indexing.
|
||||
//
|
||||
// allPossibleMatchingAccessModes gets all stringified accessmodes from the
|
||||
// index and returns all those that contain at least all of the requested
|
||||
// mode.
|
||||
//
|
||||
// For example, assume the index contains 2 types of PVs where the stringified
|
||||
// accessmodes are:
|
||||
//
|
||||
// "RWO,ROX" -- some number of GCEPDs
|
||||
// "RWO,ROX,RWX" -- some number of NFS volumes
|
||||
//
|
||||
// A request for RWO could be satisfied by both sets of indexed volumes, so
|
||||
// allPossibleMatchingAccessModes returns:
|
||||
//
|
||||
// [][]v1.PersistentVolumeAccessMode {
|
||||
// []v1.PersistentVolumeAccessMode {
|
||||
// v1.ReadWriteOnce, v1.ReadOnlyMany,
|
||||
// },
|
||||
// []v1.PersistentVolumeAccessMode {
|
||||
// v1.ReadWriteOnce, v1.ReadOnlyMany, v1.ReadWriteMany,
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// A request for RWX can be satisfied by only one set of indexed volumes, so
|
||||
// the return is:
|
||||
//
|
||||
// [][]v1.PersistentVolumeAccessMode {
|
||||
// []v1.PersistentVolumeAccessMode {
|
||||
// v1.ReadWriteOnce, v1.ReadOnlyMany, v1.ReadWriteMany,
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// This func returns modes with ascending levels of modes to give the user
|
||||
// what is closest to what they actually asked for.
|
||||
func (pvIndex *persistentVolumeOrderedIndex) allPossibleMatchingAccessModes(requestedModes []v1.PersistentVolumeAccessMode) [][]v1.PersistentVolumeAccessMode {
|
||||
matchedModes := [][]v1.PersistentVolumeAccessMode{}
|
||||
keys := pvIndex.store.ListIndexFuncValues("accessmodes")
|
||||
for _, key := range keys {
|
||||
indexedModes := v1helper.GetAccessModesFromString(key)
|
||||
if volumeutil.AccessModesContainedInAll(indexedModes, requestedModes) {
|
||||
matchedModes = append(matchedModes, indexedModes)
|
||||
}
|
||||
}
|
||||
|
||||
// sort by the number of modes in each array with the fewest number of
|
||||
// modes coming first. this allows searching for volumes by the minimum
|
||||
// number of modes required of the possible matches.
|
||||
sort.Sort(byAccessModes{matchedModes})
|
||||
return matchedModes
|
||||
}
|
||||
|
||||
// byAccessModes is used to order access modes by size, with the fewest modes first
|
||||
type byAccessModes struct {
|
||||
modes [][]v1.PersistentVolumeAccessMode
|
||||
}
|
||||
|
||||
func (c byAccessModes) Less(i, j int) bool {
|
||||
return len(c.modes[i]) < len(c.modes[j])
|
||||
}
|
||||
|
||||
func (c byAccessModes) Swap(i, j int) {
|
||||
c.modes[i], c.modes[j] = c.modes[j], c.modes[i]
|
||||
}
|
||||
|
||||
func (c byAccessModes) Len() int {
|
||||
return len(c.modes)
|
||||
}
|
||||
|
||||
func claimToClaimKey(claim *v1.PersistentVolumeClaim) string {
|
||||
return fmt.Sprintf("%s/%s", claim.Namespace, claim.Name)
|
||||
}
|
||||
|
||||
func claimrefToClaimKey(claimref *v1.ObjectReference) string {
|
||||
return fmt.Sprintf("%s/%s", claimref.Namespace, claimref.Name)
|
||||
}
|
||||
|
||||
// Returns true if PV satisfies all the PVC's requested AccessModes
|
||||
func checkAccessModes(claim *v1.PersistentVolumeClaim, volume *v1.PersistentVolume) bool {
|
||||
pvModesMap := map[v1.PersistentVolumeAccessMode]bool{}
|
||||
for _, mode := range volume.Spec.AccessModes {
|
||||
pvModesMap[mode] = true
|
||||
}
|
||||
|
||||
for _, mode := range claim.Spec.AccessModes {
|
||||
_, ok := pvModesMap[mode]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
211
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics/metrics.go
generated
vendored
Normal file
211
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics/metrics.go
generated
vendored
Normal file
@ -0,0 +1,211 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
// Subsystem names.
|
||||
pvControllerSubsystem = "pv_collector"
|
||||
|
||||
// Metric names.
|
||||
boundPVKey = "bound_pv_count"
|
||||
unboundPVKey = "unbound_pv_count"
|
||||
boundPVCKey = "bound_pvc_count"
|
||||
unboundPVCKey = "unbound_pvc_count"
|
||||
|
||||
// Label names.
|
||||
namespaceLabel = "namespace"
|
||||
storageClassLabel = "storage_class"
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// PVLister used to list persistent volumes.
|
||||
type PVLister interface {
|
||||
List() []interface{}
|
||||
}
|
||||
|
||||
// PVCLister used to list persistent volume claims.
|
||||
type PVCLister interface {
|
||||
List() []interface{}
|
||||
}
|
||||
|
||||
// Register all metrics for pv controller.
|
||||
func Register(pvLister PVLister, pvcLister PVCLister) {
|
||||
registerMetrics.Do(func() {
|
||||
prometheus.MustRegister(newPVAndPVCCountCollector(pvLister, pvcLister))
|
||||
prometheus.MustRegister(volumeOperationMetric)
|
||||
prometheus.MustRegister(volumeOperationErrorsMetric)
|
||||
})
|
||||
}
|
||||
|
||||
func newPVAndPVCCountCollector(pvLister PVLister, pvcLister PVCLister) *pvAndPVCCountCollector {
|
||||
return &pvAndPVCCountCollector{pvLister, pvcLister}
|
||||
}
|
||||
|
||||
// Custom collector for current pod and container counts.
|
||||
type pvAndPVCCountCollector struct {
|
||||
// Cache for accessing information about PersistentVolumes.
|
||||
pvLister PVLister
|
||||
// Cache for accessing information about PersistentVolumeClaims.
|
||||
pvcLister PVCLister
|
||||
}
|
||||
|
||||
var (
|
||||
boundPVCountDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName("", pvControllerSubsystem, boundPVKey),
|
||||
"Gauge measuring number of persistent volume currently bound",
|
||||
[]string{storageClassLabel}, nil)
|
||||
unboundPVCountDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName("", pvControllerSubsystem, unboundPVKey),
|
||||
"Gauge measuring number of persistent volume currently unbound",
|
||||
[]string{storageClassLabel}, nil)
|
||||
|
||||
boundPVCCountDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName("", pvControllerSubsystem, boundPVCKey),
|
||||
"Gauge measuring number of persistent volume claim currently bound",
|
||||
[]string{namespaceLabel}, nil)
|
||||
unboundPVCCountDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName("", pvControllerSubsystem, unboundPVCKey),
|
||||
"Gauge measuring number of persistent volume claim currently unbound",
|
||||
[]string{namespaceLabel}, nil)
|
||||
|
||||
volumeOperationMetric = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "volume_operation_total_seconds",
|
||||
Help: "Total volume operation time",
|
||||
},
|
||||
[]string{"plugin_name", "operation_name"})
|
||||
volumeOperationErrorsMetric = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "volume_operation_total_errors",
|
||||
Help: "Total volume operation erros",
|
||||
},
|
||||
[]string{"plugin_name", "operation_name"})
|
||||
)
|
||||
|
||||
func (collector *pvAndPVCCountCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- boundPVCountDesc
|
||||
ch <- unboundPVCountDesc
|
||||
ch <- boundPVCCountDesc
|
||||
ch <- unboundPVCCountDesc
|
||||
}
|
||||
|
||||
func (collector *pvAndPVCCountCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
collector.pvCollect(ch)
|
||||
collector.pvcCollect(ch)
|
||||
}
|
||||
|
||||
func (collector *pvAndPVCCountCollector) pvCollect(ch chan<- prometheus.Metric) {
|
||||
boundNumberByStorageClass := make(map[string]int)
|
||||
unboundNumberByStorageClass := make(map[string]int)
|
||||
for _, pvObj := range collector.pvLister.List() {
|
||||
pv, ok := pvObj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if pv.Status.Phase == v1.VolumeBound {
|
||||
boundNumberByStorageClass[pv.Spec.StorageClassName]++
|
||||
} else {
|
||||
unboundNumberByStorageClass[pv.Spec.StorageClassName]++
|
||||
}
|
||||
}
|
||||
for storageClassName, number := range boundNumberByStorageClass {
|
||||
metric, err := prometheus.NewConstMetric(
|
||||
boundPVCountDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(number),
|
||||
storageClassName)
|
||||
if err != nil {
|
||||
klog.Warningf("Create bound pv number metric failed: %v", err)
|
||||
continue
|
||||
}
|
||||
ch <- metric
|
||||
}
|
||||
for storageClassName, number := range unboundNumberByStorageClass {
|
||||
metric, err := prometheus.NewConstMetric(
|
||||
unboundPVCountDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(number),
|
||||
storageClassName)
|
||||
if err != nil {
|
||||
klog.Warningf("Create unbound pv number metric failed: %v", err)
|
||||
continue
|
||||
}
|
||||
ch <- metric
|
||||
}
|
||||
}
|
||||
|
||||
func (collector *pvAndPVCCountCollector) pvcCollect(ch chan<- prometheus.Metric) {
|
||||
boundNumberByNamespace := make(map[string]int)
|
||||
unboundNumberByNamespace := make(map[string]int)
|
||||
for _, pvcObj := range collector.pvcLister.List() {
|
||||
pvc, ok := pvcObj.(*v1.PersistentVolumeClaim)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if pvc.Status.Phase == v1.ClaimBound {
|
||||
boundNumberByNamespace[pvc.Namespace]++
|
||||
} else {
|
||||
unboundNumberByNamespace[pvc.Namespace]++
|
||||
}
|
||||
}
|
||||
for namespace, number := range boundNumberByNamespace {
|
||||
metric, err := prometheus.NewConstMetric(
|
||||
boundPVCCountDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(number),
|
||||
namespace)
|
||||
if err != nil {
|
||||
klog.Warningf("Create bound pvc number metric failed: %v", err)
|
||||
continue
|
||||
}
|
||||
ch <- metric
|
||||
}
|
||||
for namespace, number := range unboundNumberByNamespace {
|
||||
metric, err := prometheus.NewConstMetric(
|
||||
unboundPVCCountDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(number),
|
||||
namespace)
|
||||
if err != nil {
|
||||
klog.Warningf("Create unbound pvc number metric failed: %v", err)
|
||||
continue
|
||||
}
|
||||
ch <- metric
|
||||
}
|
||||
}
|
||||
|
||||
// RecordVolumeOperationMetric records the latency and errors of volume operations.
|
||||
func RecordVolumeOperationMetric(pluginName, opName string, timeTaken float64, err error) {
|
||||
if pluginName == "" {
|
||||
pluginName = "N/A"
|
||||
}
|
||||
if err != nil {
|
||||
volumeOperationErrorsMetric.WithLabelValues(pluginName, opName).Inc()
|
||||
return
|
||||
}
|
||||
volumeOperationMetric.WithLabelValues(pluginName, opName).Observe(timeTaken)
|
||||
}
|
1695
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/pv_controller.go
generated
vendored
Normal file
1695
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/pv_controller.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
523
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/pv_controller_base.go
generated
vendored
Normal file
523
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/pv_controller_base.go
generated
vendored
Normal file
@ -0,0 +1,523 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
storageinformers "k8s.io/client-go/informers/storage/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics"
|
||||
"k8s.io/kubernetes/pkg/util/goroutinemap"
|
||||
vol "k8s.io/kubernetes/pkg/volume"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
// This file contains the controller base functionality, i.e. framework to
|
||||
// process PV/PVC added/updated/deleted events. The real binding, provisioning,
|
||||
// recycling and deleting is done in pv_controller.go
|
||||
|
||||
// ControllerParameters contains arguments for creation of a new
|
||||
// PersistentVolume controller.
|
||||
type ControllerParameters struct {
|
||||
KubeClient clientset.Interface
|
||||
SyncPeriod time.Duration
|
||||
VolumePlugins []vol.VolumePlugin
|
||||
Cloud cloudprovider.Interface
|
||||
ClusterName string
|
||||
VolumeInformer coreinformers.PersistentVolumeInformer
|
||||
ClaimInformer coreinformers.PersistentVolumeClaimInformer
|
||||
ClassInformer storageinformers.StorageClassInformer
|
||||
PodInformer coreinformers.PodInformer
|
||||
NodeInformer coreinformers.NodeInformer
|
||||
EventRecorder record.EventRecorder
|
||||
EnableDynamicProvisioning bool
|
||||
}
|
||||
|
||||
// NewController creates a new PersistentVolume controller
|
||||
func NewController(p ControllerParameters) (*PersistentVolumeController, error) {
|
||||
eventRecorder := p.EventRecorder
|
||||
if eventRecorder == nil {
|
||||
broadcaster := record.NewBroadcaster()
|
||||
broadcaster.StartLogging(klog.Infof)
|
||||
broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: p.KubeClient.CoreV1().Events("")})
|
||||
eventRecorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "persistentvolume-controller"})
|
||||
}
|
||||
|
||||
controller := &PersistentVolumeController{
|
||||
volumes: newPersistentVolumeOrderedIndex(),
|
||||
claims: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
|
||||
kubeClient: p.KubeClient,
|
||||
eventRecorder: eventRecorder,
|
||||
runningOperations: goroutinemap.NewGoRoutineMap(true /* exponentialBackOffOnError */),
|
||||
cloud: p.Cloud,
|
||||
enableDynamicProvisioning: p.EnableDynamicProvisioning,
|
||||
clusterName: p.ClusterName,
|
||||
createProvisionedPVRetryCount: createProvisionedPVRetryCount,
|
||||
createProvisionedPVInterval: createProvisionedPVInterval,
|
||||
claimQueue: workqueue.NewNamed("claims"),
|
||||
volumeQueue: workqueue.NewNamed("volumes"),
|
||||
resyncPeriod: p.SyncPeriod,
|
||||
}
|
||||
|
||||
// Prober is nil because PV is not aware of Flexvolume.
|
||||
if err := controller.volumePluginMgr.InitPlugins(p.VolumePlugins, nil /* prober */, controller); err != nil {
|
||||
return nil, fmt.Errorf("Could not initialize volume plugins for PersistentVolume Controller: %v", err)
|
||||
}
|
||||
|
||||
p.VolumeInformer.Informer().AddEventHandler(
|
||||
cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(obj interface{}) { controller.enqueueWork(controller.volumeQueue, obj) },
|
||||
UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueWork(controller.volumeQueue, newObj) },
|
||||
DeleteFunc: func(obj interface{}) { controller.enqueueWork(controller.volumeQueue, obj) },
|
||||
},
|
||||
)
|
||||
controller.volumeLister = p.VolumeInformer.Lister()
|
||||
controller.volumeListerSynced = p.VolumeInformer.Informer().HasSynced
|
||||
|
||||
p.ClaimInformer.Informer().AddEventHandler(
|
||||
cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(obj interface{}) { controller.enqueueWork(controller.claimQueue, obj) },
|
||||
UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueWork(controller.claimQueue, newObj) },
|
||||
DeleteFunc: func(obj interface{}) { controller.enqueueWork(controller.claimQueue, obj) },
|
||||
},
|
||||
)
|
||||
controller.claimLister = p.ClaimInformer.Lister()
|
||||
controller.claimListerSynced = p.ClaimInformer.Informer().HasSynced
|
||||
|
||||
controller.classLister = p.ClassInformer.Lister()
|
||||
controller.classListerSynced = p.ClassInformer.Informer().HasSynced
|
||||
controller.podLister = p.PodInformer.Lister()
|
||||
controller.podListerSynced = p.PodInformer.Informer().HasSynced
|
||||
controller.NodeLister = p.NodeInformer.Lister()
|
||||
controller.NodeListerSynced = p.NodeInformer.Informer().HasSynced
|
||||
return controller, nil
|
||||
}
|
||||
|
||||
// initializeCaches fills all controller caches with initial data from etcd in
|
||||
// order to have the caches already filled when first addClaim/addVolume to
|
||||
// perform initial synchronization of the controller.
|
||||
func (ctrl *PersistentVolumeController) initializeCaches(volumeLister corelisters.PersistentVolumeLister, claimLister corelisters.PersistentVolumeClaimLister) {
|
||||
volumeList, err := volumeLister.List(labels.Everything())
|
||||
if err != nil {
|
||||
klog.Errorf("PersistentVolumeController can't initialize caches: %v", err)
|
||||
return
|
||||
}
|
||||
for _, volume := range volumeList {
|
||||
volumeClone := volume.DeepCopy()
|
||||
if _, err = ctrl.storeVolumeUpdate(volumeClone); err != nil {
|
||||
klog.Errorf("error updating volume cache: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
claimList, err := claimLister.List(labels.Everything())
|
||||
if err != nil {
|
||||
klog.Errorf("PersistentVolumeController can't initialize caches: %v", err)
|
||||
return
|
||||
}
|
||||
for _, claim := range claimList {
|
||||
if _, err = ctrl.storeClaimUpdate(claim.DeepCopy()); err != nil {
|
||||
klog.Errorf("error updating claim cache: %v", err)
|
||||
}
|
||||
}
|
||||
klog.V(4).Infof("controller initialized")
|
||||
}
|
||||
|
||||
// enqueueWork adds volume or claim to given work queue.
|
||||
func (ctrl *PersistentVolumeController) enqueueWork(queue workqueue.Interface, obj interface{}) {
|
||||
// Beware of "xxx deleted" events
|
||||
if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil {
|
||||
obj = unknown.Obj
|
||||
}
|
||||
objName, err := controller.KeyFunc(obj)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to get key from object: %v", err)
|
||||
return
|
||||
}
|
||||
klog.V(5).Infof("enqueued %q for sync", objName)
|
||||
queue.Add(objName)
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) storeVolumeUpdate(volume interface{}) (bool, error) {
|
||||
return storeObjectUpdate(ctrl.volumes.store, volume, "volume")
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) storeClaimUpdate(claim interface{}) (bool, error) {
|
||||
return storeObjectUpdate(ctrl.claims, claim, "claim")
|
||||
}
|
||||
|
||||
// updateVolume runs in worker thread and handles "volume added",
|
||||
// "volume updated" and "periodic sync" events.
|
||||
func (ctrl *PersistentVolumeController) updateVolume(volume *v1.PersistentVolume) {
|
||||
// Store the new volume version in the cache and do not process it if this
|
||||
// is an old version.
|
||||
new, err := ctrl.storeVolumeUpdate(volume)
|
||||
if err != nil {
|
||||
klog.Errorf("%v", err)
|
||||
}
|
||||
if !new {
|
||||
return
|
||||
}
|
||||
|
||||
err = ctrl.syncVolume(volume)
|
||||
if err != nil {
|
||||
if errors.IsConflict(err) {
|
||||
// Version conflict error happens quite often and the controller
|
||||
// recovers from it easily.
|
||||
klog.V(3).Infof("could not sync volume %q: %+v", volume.Name, err)
|
||||
} else {
|
||||
klog.Errorf("could not sync volume %q: %+v", volume.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// deleteVolume runs in worker thread and handles "volume deleted" event.
|
||||
func (ctrl *PersistentVolumeController) deleteVolume(volume *v1.PersistentVolume) {
|
||||
_ = ctrl.volumes.store.Delete(volume)
|
||||
klog.V(4).Infof("volume %q deleted", volume.Name)
|
||||
|
||||
if volume.Spec.ClaimRef == nil {
|
||||
return
|
||||
}
|
||||
// sync the claim when its volume is deleted. Explicitly syncing the
|
||||
// claim here in response to volume deletion prevents the claim from
|
||||
// waiting until the next sync period for its Lost status.
|
||||
claimKey := claimrefToClaimKey(volume.Spec.ClaimRef)
|
||||
klog.V(5).Infof("deleteVolume[%s]: scheduling sync of claim %q", volume.Name, claimKey)
|
||||
ctrl.claimQueue.Add(claimKey)
|
||||
}
|
||||
|
||||
// updateClaim runs in worker thread and handles "claim added",
|
||||
// "claim updated" and "periodic sync" events.
|
||||
func (ctrl *PersistentVolumeController) updateClaim(claim *v1.PersistentVolumeClaim) {
|
||||
// Store the new claim version in the cache and do not process it if this is
|
||||
// an old version.
|
||||
new, err := ctrl.storeClaimUpdate(claim)
|
||||
if err != nil {
|
||||
klog.Errorf("%v", err)
|
||||
}
|
||||
if !new {
|
||||
return
|
||||
}
|
||||
err = ctrl.syncClaim(claim)
|
||||
if err != nil {
|
||||
if errors.IsConflict(err) {
|
||||
// Version conflict error happens quite often and the controller
|
||||
// recovers from it easily.
|
||||
klog.V(3).Infof("could not sync claim %q: %+v", claimToClaimKey(claim), err)
|
||||
} else {
|
||||
klog.Errorf("could not sync volume %q: %+v", claimToClaimKey(claim), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// deleteClaim runs in worker thread and handles "claim deleted" event.
|
||||
func (ctrl *PersistentVolumeController) deleteClaim(claim *v1.PersistentVolumeClaim) {
|
||||
_ = ctrl.claims.Delete(claim)
|
||||
klog.V(4).Infof("claim %q deleted", claimToClaimKey(claim))
|
||||
|
||||
volumeName := claim.Spec.VolumeName
|
||||
if volumeName == "" {
|
||||
klog.V(5).Infof("deleteClaim[%q]: volume not bound", claimToClaimKey(claim))
|
||||
return
|
||||
}
|
||||
// sync the volume when its claim is deleted. Explicitly sync'ing the
|
||||
// volume here in response to claim deletion prevents the volume from
|
||||
// waiting until the next sync period for its Release.
|
||||
klog.V(5).Infof("deleteClaim[%q]: scheduling sync of volume %s", claimToClaimKey(claim), volumeName)
|
||||
ctrl.volumeQueue.Add(volumeName)
|
||||
}
|
||||
|
||||
// Run starts all of this controller's control loops
|
||||
func (ctrl *PersistentVolumeController) Run(stopCh <-chan struct{}) {
|
||||
defer utilruntime.HandleCrash()
|
||||
defer ctrl.claimQueue.ShutDown()
|
||||
defer ctrl.volumeQueue.ShutDown()
|
||||
|
||||
klog.Infof("Starting persistent volume controller")
|
||||
defer klog.Infof("Shutting down persistent volume controller")
|
||||
|
||||
if !controller.WaitForCacheSync("persistent volume", stopCh, ctrl.volumeListerSynced, ctrl.claimListerSynced, ctrl.classListerSynced, ctrl.podListerSynced, ctrl.NodeListerSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
ctrl.initializeCaches(ctrl.volumeLister, ctrl.claimLister)
|
||||
|
||||
go wait.Until(ctrl.resync, ctrl.resyncPeriod, stopCh)
|
||||
go wait.Until(ctrl.volumeWorker, time.Second, stopCh)
|
||||
go wait.Until(ctrl.claimWorker, time.Second, stopCh)
|
||||
|
||||
metrics.Register(ctrl.volumes.store, ctrl.claims)
|
||||
|
||||
<-stopCh
|
||||
}
|
||||
|
||||
// volumeWorker processes items from volumeQueue. It must run only once,
|
||||
// syncVolume is not assured to be reentrant.
|
||||
func (ctrl *PersistentVolumeController) volumeWorker() {
|
||||
workFunc := func() bool {
|
||||
keyObj, quit := ctrl.volumeQueue.Get()
|
||||
if quit {
|
||||
return true
|
||||
}
|
||||
defer ctrl.volumeQueue.Done(keyObj)
|
||||
key := keyObj.(string)
|
||||
klog.V(5).Infof("volumeWorker[%s]", key)
|
||||
|
||||
_, name, err := cache.SplitMetaNamespaceKey(key)
|
||||
if err != nil {
|
||||
klog.V(4).Infof("error getting name of volume %q to get volume from informer: %v", key, err)
|
||||
return false
|
||||
}
|
||||
volume, err := ctrl.volumeLister.Get(name)
|
||||
if err == nil {
|
||||
// The volume still exists in informer cache, the event must have
|
||||
// been add/update/sync
|
||||
ctrl.updateVolume(volume)
|
||||
return false
|
||||
}
|
||||
if !errors.IsNotFound(err) {
|
||||
klog.V(2).Infof("error getting volume %q from informer: %v", key, err)
|
||||
return false
|
||||
}
|
||||
|
||||
// The volume is not in informer cache, the event must have been
|
||||
// "delete"
|
||||
volumeObj, found, err := ctrl.volumes.store.GetByKey(key)
|
||||
if err != nil {
|
||||
klog.V(2).Infof("error getting volume %q from cache: %v", key, err)
|
||||
return false
|
||||
}
|
||||
if !found {
|
||||
// The controller has already processed the delete event and
|
||||
// deleted the volume from its cache
|
||||
klog.V(2).Infof("deletion of volume %q was already processed", key)
|
||||
return false
|
||||
}
|
||||
volume, ok := volumeObj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
klog.Errorf("expected volume, got %+v", volumeObj)
|
||||
return false
|
||||
}
|
||||
ctrl.deleteVolume(volume)
|
||||
return false
|
||||
}
|
||||
for {
|
||||
if quit := workFunc(); quit {
|
||||
klog.Infof("volume worker queue shutting down")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// claimWorker processes items from claimQueue. It must run only once,
|
||||
// syncClaim is not reentrant.
|
||||
func (ctrl *PersistentVolumeController) claimWorker() {
|
||||
workFunc := func() bool {
|
||||
keyObj, quit := ctrl.claimQueue.Get()
|
||||
if quit {
|
||||
return true
|
||||
}
|
||||
defer ctrl.claimQueue.Done(keyObj)
|
||||
key := keyObj.(string)
|
||||
klog.V(5).Infof("claimWorker[%s]", key)
|
||||
|
||||
namespace, name, err := cache.SplitMetaNamespaceKey(key)
|
||||
if err != nil {
|
||||
klog.V(4).Infof("error getting namespace & name of claim %q to get claim from informer: %v", key, err)
|
||||
return false
|
||||
}
|
||||
claim, err := ctrl.claimLister.PersistentVolumeClaims(namespace).Get(name)
|
||||
if err == nil {
|
||||
// The claim still exists in informer cache, the event must have
|
||||
// been add/update/sync
|
||||
ctrl.updateClaim(claim)
|
||||
return false
|
||||
}
|
||||
if !errors.IsNotFound(err) {
|
||||
klog.V(2).Infof("error getting claim %q from informer: %v", key, err)
|
||||
return false
|
||||
}
|
||||
|
||||
// The claim is not in informer cache, the event must have been "delete"
|
||||
claimObj, found, err := ctrl.claims.GetByKey(key)
|
||||
if err != nil {
|
||||
klog.V(2).Infof("error getting claim %q from cache: %v", key, err)
|
||||
return false
|
||||
}
|
||||
if !found {
|
||||
// The controller has already processed the delete event and
|
||||
// deleted the claim from its cache
|
||||
klog.V(2).Infof("deletion of claim %q was already processed", key)
|
||||
return false
|
||||
}
|
||||
claim, ok := claimObj.(*v1.PersistentVolumeClaim)
|
||||
if !ok {
|
||||
klog.Errorf("expected claim, got %+v", claimObj)
|
||||
return false
|
||||
}
|
||||
ctrl.deleteClaim(claim)
|
||||
return false
|
||||
}
|
||||
for {
|
||||
if quit := workFunc(); quit {
|
||||
klog.Infof("claim worker queue shutting down")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// resync supplements short resync period of shared informers - we don't want
|
||||
// all consumers of PV/PVC shared informer to have a short resync period,
|
||||
// therefore we do our own.
|
||||
func (ctrl *PersistentVolumeController) resync() {
|
||||
klog.V(4).Infof("resyncing PV controller")
|
||||
|
||||
pvcs, err := ctrl.claimLister.List(labels.NewSelector())
|
||||
if err != nil {
|
||||
klog.Warningf("cannot list claims: %s", err)
|
||||
return
|
||||
}
|
||||
for _, pvc := range pvcs {
|
||||
ctrl.enqueueWork(ctrl.claimQueue, pvc)
|
||||
}
|
||||
|
||||
pvs, err := ctrl.volumeLister.List(labels.NewSelector())
|
||||
if err != nil {
|
||||
klog.Warningf("cannot list persistent volumes: %s", err)
|
||||
return
|
||||
}
|
||||
for _, pv := range pvs {
|
||||
ctrl.enqueueWork(ctrl.volumeQueue, pv)
|
||||
}
|
||||
}
|
||||
|
||||
// setClaimProvisioner saves
|
||||
// claim.Annotations[annStorageProvisioner] = class.Provisioner
|
||||
func (ctrl *PersistentVolumeController) setClaimProvisioner(claim *v1.PersistentVolumeClaim, provisionerName string) (*v1.PersistentVolumeClaim, error) {
|
||||
if val, ok := claim.Annotations[annStorageProvisioner]; ok && val == provisionerName {
|
||||
// annotation is already set, nothing to do
|
||||
return claim, nil
|
||||
}
|
||||
|
||||
// The volume from method args can be pointing to watcher cache. We must not
|
||||
// modify these, therefore create a copy.
|
||||
claimClone := claim.DeepCopy()
|
||||
metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, annStorageProvisioner, provisionerName)
|
||||
newClaim, err := ctrl.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(claimClone)
|
||||
if err != nil {
|
||||
return newClaim, err
|
||||
}
|
||||
_, err = ctrl.storeClaimUpdate(newClaim)
|
||||
if err != nil {
|
||||
return newClaim, err
|
||||
}
|
||||
return newClaim, nil
|
||||
}
|
||||
|
||||
// Stateless functions
|
||||
|
||||
func getClaimStatusForLogging(claim *v1.PersistentVolumeClaim) string {
|
||||
bound := metav1.HasAnnotation(claim.ObjectMeta, annBindCompleted)
|
||||
boundByController := metav1.HasAnnotation(claim.ObjectMeta, annBoundByController)
|
||||
|
||||
return fmt.Sprintf("phase: %s, bound to: %q, bindCompleted: %v, boundByController: %v", claim.Status.Phase, claim.Spec.VolumeName, bound, boundByController)
|
||||
}
|
||||
|
||||
func getVolumeStatusForLogging(volume *v1.PersistentVolume) string {
|
||||
boundByController := metav1.HasAnnotation(volume.ObjectMeta, annBoundByController)
|
||||
claimName := ""
|
||||
if volume.Spec.ClaimRef != nil {
|
||||
claimName = fmt.Sprintf("%s/%s (uid: %s)", volume.Spec.ClaimRef.Namespace, volume.Spec.ClaimRef.Name, volume.Spec.ClaimRef.UID)
|
||||
}
|
||||
return fmt.Sprintf("phase: %s, bound to: %q, boundByController: %v", volume.Status.Phase, claimName, boundByController)
|
||||
}
|
||||
|
||||
// storeObjectUpdate updates given cache with a new object version from Informer
|
||||
// callback (i.e. with events from etcd) or with an object modified by the
|
||||
// controller itself. Returns "true", if the cache was updated, false if the
|
||||
// object is an old version and should be ignored.
|
||||
func storeObjectUpdate(store cache.Store, obj interface{}, className string) (bool, error) {
|
||||
objName, err := controller.KeyFunc(obj)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)
|
||||
}
|
||||
oldObj, found, err := store.Get(obj)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Error finding %s %q in controller cache: %v", className, objName, err)
|
||||
}
|
||||
|
||||
objAccessor, err := meta.Accessor(obj)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if !found {
|
||||
// This is a new object
|
||||
klog.V(4).Infof("storeObjectUpdate: adding %s %q, version %s", className, objName, objAccessor.GetResourceVersion())
|
||||
if err = store.Add(obj); err != nil {
|
||||
return false, fmt.Errorf("Error adding %s %q to controller cache: %v", className, objName, err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
oldObjAccessor, err := meta.Accessor(oldObj)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
objResourceVersion, err := strconv.ParseInt(objAccessor.GetResourceVersion(), 10, 64)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Error parsing ResourceVersion %q of %s %q: %s", objAccessor.GetResourceVersion(), className, objName, err)
|
||||
}
|
||||
oldObjResourceVersion, err := strconv.ParseInt(oldObjAccessor.GetResourceVersion(), 10, 64)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Error parsing old ResourceVersion %q of %s %q: %s", oldObjAccessor.GetResourceVersion(), className, objName, err)
|
||||
}
|
||||
|
||||
// Throw away only older version, let the same version pass - we do want to
|
||||
// get periodic sync events.
|
||||
if oldObjResourceVersion > objResourceVersion {
|
||||
klog.V(4).Infof("storeObjectUpdate: ignoring %s %q version %s", className, objName, objAccessor.GetResourceVersion())
|
||||
return false, nil
|
||||
}
|
||||
|
||||
klog.V(4).Infof("storeObjectUpdate updating %s %q with version %s", className, objName, objAccessor.GetResourceVersion())
|
||||
if err = store.Update(obj); err != nil {
|
||||
return false, fmt.Errorf("Error updating %s %q in controller cache: %v", className, objName, err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
444
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_assume_cache.go
generated
vendored
Normal file
444
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_assume_cache.go
generated
vendored
Normal file
@ -0,0 +1,444 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"k8s.io/klog"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
// AssumeCache is a cache on top of the informer that allows for updating
|
||||
// objects outside of informer events and also restoring the informer
|
||||
// cache's version of the object. Objects are assumed to be
|
||||
// Kubernetes API objects that implement meta.Interface
|
||||
type AssumeCache interface {
|
||||
// Assume updates the object in-memory only
|
||||
Assume(obj interface{}) error
|
||||
|
||||
// Restore the informer cache's version of the object
|
||||
Restore(objName string)
|
||||
|
||||
// Get the object by name
|
||||
Get(objName string) (interface{}, error)
|
||||
|
||||
// Get the API object by name
|
||||
GetAPIObj(objName string) (interface{}, error)
|
||||
|
||||
// List all the objects in the cache
|
||||
List(indexObj interface{}) []interface{}
|
||||
}
|
||||
|
||||
type errWrongType struct {
|
||||
typeName string
|
||||
object interface{}
|
||||
}
|
||||
|
||||
func (e *errWrongType) Error() string {
|
||||
return fmt.Sprintf("could not convert object to type %v: %+v", e.typeName, e.object)
|
||||
}
|
||||
|
||||
type errNotFound struct {
|
||||
typeName string
|
||||
objectName string
|
||||
}
|
||||
|
||||
func (e *errNotFound) Error() string {
|
||||
return fmt.Sprintf("could not find %v %q", e.typeName, e.objectName)
|
||||
}
|
||||
|
||||
type errObjectName struct {
|
||||
detailedErr error
|
||||
}
|
||||
|
||||
func (e *errObjectName) Error() string {
|
||||
return fmt.Sprintf("failed to get object name: %v", e.detailedErr)
|
||||
}
|
||||
|
||||
// assumeCache stores two pointers to represent a single object:
|
||||
// * The pointer to the informer object.
|
||||
// * The pointer to the latest object, which could be the same as
|
||||
// the informer object, or an in-memory object.
|
||||
//
|
||||
// An informer update always overrides the latest object pointer.
|
||||
//
|
||||
// Assume() only updates the latest object pointer.
|
||||
// Restore() sets the latest object pointer back to the informer object.
|
||||
// Get/List() always returns the latest object pointer.
|
||||
type assumeCache struct {
|
||||
// Synchronizes updates to store
|
||||
rwMutex sync.RWMutex
|
||||
|
||||
// describes the object stored
|
||||
description string
|
||||
|
||||
// Stores objInfo pointers
|
||||
store cache.Indexer
|
||||
|
||||
// Index function for object
|
||||
indexFunc cache.IndexFunc
|
||||
indexName string
|
||||
}
|
||||
|
||||
type objInfo struct {
|
||||
// name of the object
|
||||
name string
|
||||
|
||||
// Latest version of object could be cached-only or from informer
|
||||
latestObj interface{}
|
||||
|
||||
// Latest object from informer
|
||||
apiObj interface{}
|
||||
}
|
||||
|
||||
func objInfoKeyFunc(obj interface{}) (string, error) {
|
||||
objInfo, ok := obj.(*objInfo)
|
||||
if !ok {
|
||||
return "", &errWrongType{"objInfo", obj}
|
||||
}
|
||||
return objInfo.name, nil
|
||||
}
|
||||
|
||||
func (c *assumeCache) objInfoIndexFunc(obj interface{}) ([]string, error) {
|
||||
objInfo, ok := obj.(*objInfo)
|
||||
if !ok {
|
||||
return []string{""}, &errWrongType{"objInfo", obj}
|
||||
}
|
||||
return c.indexFunc(objInfo.latestObj)
|
||||
}
|
||||
|
||||
func NewAssumeCache(informer cache.SharedIndexInformer, description, indexName string, indexFunc cache.IndexFunc) *assumeCache {
|
||||
c := &assumeCache{
|
||||
description: description,
|
||||
indexFunc: indexFunc,
|
||||
indexName: indexName,
|
||||
}
|
||||
c.store = cache.NewIndexer(objInfoKeyFunc, cache.Indexers{indexName: c.objInfoIndexFunc})
|
||||
|
||||
// Unit tests don't use informers
|
||||
if informer != nil {
|
||||
informer.AddEventHandler(
|
||||
cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: c.add,
|
||||
UpdateFunc: c.update,
|
||||
DeleteFunc: c.delete,
|
||||
},
|
||||
)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *assumeCache) add(obj interface{}) {
|
||||
if obj == nil {
|
||||
return
|
||||
}
|
||||
|
||||
name, err := cache.MetaNamespaceKeyFunc(obj)
|
||||
if err != nil {
|
||||
klog.Errorf("add failed: %v", &errObjectName{err})
|
||||
return
|
||||
}
|
||||
|
||||
c.rwMutex.Lock()
|
||||
defer c.rwMutex.Unlock()
|
||||
|
||||
if objInfo, _ := c.getObjInfo(name); objInfo != nil {
|
||||
newVersion, err := c.getObjVersion(name, obj)
|
||||
if err != nil {
|
||||
klog.Errorf("add: couldn't get object version: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
storedVersion, err := c.getObjVersion(name, objInfo.latestObj)
|
||||
if err != nil {
|
||||
klog.Errorf("add: couldn't get stored object version: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Only update object if version is newer.
|
||||
// This is so we don't override assumed objects due to informer resync.
|
||||
if newVersion <= storedVersion {
|
||||
klog.V(10).Infof("Skip adding %v %v to assume cache because version %v is not newer than %v", c.description, name, newVersion, storedVersion)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
objInfo := &objInfo{name: name, latestObj: obj, apiObj: obj}
|
||||
c.store.Update(objInfo)
|
||||
klog.V(10).Infof("Adding %v %v to assume cache: %+v ", c.description, name, obj)
|
||||
}
|
||||
|
||||
func (c *assumeCache) update(oldObj interface{}, newObj interface{}) {
|
||||
c.add(newObj)
|
||||
}
|
||||
|
||||
func (c *assumeCache) delete(obj interface{}) {
|
||||
if obj == nil {
|
||||
return
|
||||
}
|
||||
|
||||
name, err := cache.MetaNamespaceKeyFunc(obj)
|
||||
if err != nil {
|
||||
klog.Errorf("delete failed: %v", &errObjectName{err})
|
||||
return
|
||||
}
|
||||
|
||||
c.rwMutex.Lock()
|
||||
defer c.rwMutex.Unlock()
|
||||
|
||||
objInfo := &objInfo{name: name}
|
||||
err = c.store.Delete(objInfo)
|
||||
if err != nil {
|
||||
klog.Errorf("delete: failed to delete %v %v: %v", c.description, name, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *assumeCache) getObjVersion(name string, obj interface{}) (int64, error) {
|
||||
objAccessor, err := meta.Accessor(obj)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
objResourceVersion, err := strconv.ParseInt(objAccessor.GetResourceVersion(), 10, 64)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("error parsing ResourceVersion %q for %v %q: %s", objAccessor.GetResourceVersion(), c.description, name, err)
|
||||
}
|
||||
return objResourceVersion, nil
|
||||
}
|
||||
|
||||
func (c *assumeCache) getObjInfo(name string) (*objInfo, error) {
|
||||
obj, ok, err := c.store.GetByKey(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !ok {
|
||||
return nil, &errNotFound{c.description, name}
|
||||
}
|
||||
|
||||
objInfo, ok := obj.(*objInfo)
|
||||
if !ok {
|
||||
return nil, &errWrongType{"objInfo", obj}
|
||||
}
|
||||
return objInfo, nil
|
||||
}
|
||||
|
||||
func (c *assumeCache) Get(objName string) (interface{}, error) {
|
||||
c.rwMutex.RLock()
|
||||
defer c.rwMutex.RUnlock()
|
||||
|
||||
objInfo, err := c.getObjInfo(objName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return objInfo.latestObj, nil
|
||||
}
|
||||
|
||||
func (c *assumeCache) GetAPIObj(objName string) (interface{}, error) {
|
||||
c.rwMutex.RLock()
|
||||
defer c.rwMutex.RUnlock()
|
||||
|
||||
objInfo, err := c.getObjInfo(objName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return objInfo.apiObj, nil
|
||||
}
|
||||
|
||||
func (c *assumeCache) List(indexObj interface{}) []interface{} {
|
||||
c.rwMutex.RLock()
|
||||
defer c.rwMutex.RUnlock()
|
||||
|
||||
allObjs := []interface{}{}
|
||||
objs, err := c.store.Index(c.indexName, &objInfo{latestObj: indexObj})
|
||||
if err != nil {
|
||||
klog.Errorf("list index error: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, obj := range objs {
|
||||
objInfo, ok := obj.(*objInfo)
|
||||
if !ok {
|
||||
klog.Errorf("list error: %v", &errWrongType{"objInfo", obj})
|
||||
continue
|
||||
}
|
||||
allObjs = append(allObjs, objInfo.latestObj)
|
||||
}
|
||||
return allObjs
|
||||
}
|
||||
|
||||
func (c *assumeCache) Assume(obj interface{}) error {
|
||||
name, err := cache.MetaNamespaceKeyFunc(obj)
|
||||
if err != nil {
|
||||
return &errObjectName{err}
|
||||
}
|
||||
|
||||
c.rwMutex.Lock()
|
||||
defer c.rwMutex.Unlock()
|
||||
|
||||
objInfo, err := c.getObjInfo(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newVersion, err := c.getObjVersion(name, obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
storedVersion, err := c.getObjVersion(name, objInfo.latestObj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if newVersion < storedVersion {
|
||||
return fmt.Errorf("%v %q is out of sync (stored: %d, assume: %d)", c.description, name, storedVersion, newVersion)
|
||||
}
|
||||
|
||||
// Only update the cached object
|
||||
objInfo.latestObj = obj
|
||||
klog.V(4).Infof("Assumed %v %q, version %v", c.description, name, newVersion)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *assumeCache) Restore(objName string) {
|
||||
c.rwMutex.Lock()
|
||||
defer c.rwMutex.Unlock()
|
||||
|
||||
objInfo, err := c.getObjInfo(objName)
|
||||
if err != nil {
|
||||
// This could be expected if object got deleted
|
||||
klog.V(5).Infof("Restore %v %q warning: %v", c.description, objName, err)
|
||||
} else {
|
||||
objInfo.latestObj = objInfo.apiObj
|
||||
klog.V(4).Infof("Restored %v %q", c.description, objName)
|
||||
}
|
||||
}
|
||||
|
||||
// PVAssumeCache is a AssumeCache for PersistentVolume objects
|
||||
type PVAssumeCache interface {
|
||||
AssumeCache
|
||||
|
||||
GetPV(pvName string) (*v1.PersistentVolume, error)
|
||||
GetAPIPV(pvName string) (*v1.PersistentVolume, error)
|
||||
ListPVs(storageClassName string) []*v1.PersistentVolume
|
||||
}
|
||||
|
||||
type pvAssumeCache struct {
|
||||
*assumeCache
|
||||
}
|
||||
|
||||
func pvStorageClassIndexFunc(obj interface{}) ([]string, error) {
|
||||
if pv, ok := obj.(*v1.PersistentVolume); ok {
|
||||
return []string{pv.Spec.StorageClassName}, nil
|
||||
}
|
||||
return []string{""}, fmt.Errorf("object is not a v1.PersistentVolume: %v", obj)
|
||||
}
|
||||
|
||||
func NewPVAssumeCache(informer cache.SharedIndexInformer) PVAssumeCache {
|
||||
return &pvAssumeCache{assumeCache: NewAssumeCache(informer, "v1.PersistentVolume", "storageclass", pvStorageClassIndexFunc)}
|
||||
}
|
||||
|
||||
func (c *pvAssumeCache) GetPV(pvName string) (*v1.PersistentVolume, error) {
|
||||
obj, err := c.Get(pvName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pv, ok := obj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
return nil, &errWrongType{"v1.PersistentVolume", obj}
|
||||
}
|
||||
return pv, nil
|
||||
}
|
||||
|
||||
func (c *pvAssumeCache) GetAPIPV(pvName string) (*v1.PersistentVolume, error) {
|
||||
obj, err := c.GetAPIObj(pvName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pv, ok := obj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
return nil, &errWrongType{"v1.PersistentVolume", obj}
|
||||
}
|
||||
return pv, nil
|
||||
}
|
||||
|
||||
func (c *pvAssumeCache) ListPVs(storageClassName string) []*v1.PersistentVolume {
|
||||
objs := c.List(&v1.PersistentVolume{
|
||||
Spec: v1.PersistentVolumeSpec{
|
||||
StorageClassName: storageClassName,
|
||||
},
|
||||
})
|
||||
pvs := []*v1.PersistentVolume{}
|
||||
for _, obj := range objs {
|
||||
pv, ok := obj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
klog.Errorf("ListPVs: %v", &errWrongType{"v1.PersistentVolume", obj})
|
||||
}
|
||||
pvs = append(pvs, pv)
|
||||
}
|
||||
return pvs
|
||||
}
|
||||
|
||||
// PVCAssumeCache is a AssumeCache for PersistentVolumeClaim objects
|
||||
type PVCAssumeCache interface {
|
||||
AssumeCache
|
||||
|
||||
// GetPVC returns the PVC from the cache with given pvcKey.
|
||||
// pvcKey is the result of MetaNamespaceKeyFunc on PVC obj
|
||||
GetPVC(pvcKey string) (*v1.PersistentVolumeClaim, error)
|
||||
GetAPIPVC(pvcKey string) (*v1.PersistentVolumeClaim, error)
|
||||
}
|
||||
|
||||
type pvcAssumeCache struct {
|
||||
*assumeCache
|
||||
}
|
||||
|
||||
func NewPVCAssumeCache(informer cache.SharedIndexInformer) PVCAssumeCache {
|
||||
return &pvcAssumeCache{assumeCache: NewAssumeCache(informer, "v1.PersistentVolumeClaim", "namespace", cache.MetaNamespaceIndexFunc)}
|
||||
}
|
||||
|
||||
func (c *pvcAssumeCache) GetPVC(pvcKey string) (*v1.PersistentVolumeClaim, error) {
|
||||
obj, err := c.Get(pvcKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pvc, ok := obj.(*v1.PersistentVolumeClaim)
|
||||
if !ok {
|
||||
return nil, &errWrongType{"v1.PersistentVolumeClaim", obj}
|
||||
}
|
||||
return pvc, nil
|
||||
}
|
||||
|
||||
func (c *pvcAssumeCache) GetAPIPVC(pvcKey string) (*v1.PersistentVolumeClaim, error) {
|
||||
obj, err := c.GetAPIObj(pvcKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pvc, ok := obj.(*v1.PersistentVolumeClaim)
|
||||
if !ok {
|
||||
return nil, &errWrongType{"v1.PersistentVolumeClaim", obj}
|
||||
}
|
||||
return pvc, nil
|
||||
}
|
60
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_bind_cache_metrics.go
generated
vendored
Normal file
60
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_bind_cache_metrics.go
generated
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// VolumeSchedulerSubsystem - subsystem name used by scheduler
|
||||
const VolumeSchedulerSubsystem = "scheduler_volume"
|
||||
|
||||
var (
|
||||
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "binder_cache_requests_total",
|
||||
Help: "Total number for request volume binding cache",
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
VolumeSchedulingStageLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "scheduling_duration_seconds",
|
||||
Help: "Volume scheduling stage latency",
|
||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
VolumeSchedulingStageFailed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "scheduling_stage_error_total",
|
||||
Help: "Volume scheduling stage error count",
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
)
|
||||
|
||||
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
|
||||
// used by scheduler process.
|
||||
func RegisterVolumeSchedulingMetrics() {
|
||||
prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache)
|
||||
prometheus.MustRegister(VolumeSchedulingStageLatency)
|
||||
prometheus.MustRegister(VolumeSchedulingStageFailed)
|
||||
}
|
792
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_binder.go
generated
vendored
Normal file
792
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_binder.go
generated
vendored
Normal file
@ -0,0 +1,792 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/storage/etcd"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
storageinformers "k8s.io/client-go/informers/storage/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
storagelisters "k8s.io/client-go/listers/storage/v1"
|
||||
"k8s.io/klog"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
||||
)
|
||||
|
||||
// SchedulerVolumeBinder is used by the scheduler to handle PVC/PV binding
|
||||
// and dynamic provisioning. The binding decisions are integrated into the pod scheduling
|
||||
// workflow so that the PV NodeAffinity is also considered along with the pod's other
|
||||
// scheduling requirements.
|
||||
//
|
||||
// This integrates into the existing default scheduler workflow as follows:
|
||||
// 1. The scheduler takes a Pod off the scheduler queue and processes it serially:
|
||||
// a. Invokes all predicate functions, parallelized across nodes. FindPodVolumes() is invoked here.
|
||||
// b. Invokes all priority functions. Future/TBD
|
||||
// c. Selects the best node for the Pod.
|
||||
// d. Cache the node selection for the Pod. AssumePodVolumes() is invoked here.
|
||||
// i. If PVC binding is required, cache in-memory only:
|
||||
// * For manual binding: update PV objects for prebinding to the corresponding PVCs.
|
||||
// * For dynamic provisioning: update PVC object with a selected node from c)
|
||||
// * For the pod, which PVCs and PVs need API updates.
|
||||
// ii. Afterwards, the main scheduler caches the Pod->Node binding in the scheduler's pod cache,
|
||||
// This is handled in the scheduler and not here.
|
||||
// e. Asynchronously bind volumes and pod in a separate goroutine
|
||||
// i. BindPodVolumes() is called first. It makes all the necessary API updates and waits for
|
||||
// PV controller to fully bind and provision the PVCs. If binding fails, the Pod is sent
|
||||
// back through the scheduler.
|
||||
// ii. After BindPodVolumes() is complete, then the scheduler does the final Pod->Node binding.
|
||||
// 2. Once all the assume operations are done in d), the scheduler processes the next Pod in the scheduler queue
|
||||
// while the actual binding operation occurs in the background.
|
||||
type SchedulerVolumeBinder interface {
|
||||
// FindPodVolumes checks if all of a Pod's PVCs can be satisfied by the node.
|
||||
//
|
||||
// If a PVC is bound, it checks if the PV's NodeAffinity matches the Node.
|
||||
// Otherwise, it tries to find an available PV to bind to the PVC.
|
||||
//
|
||||
// It returns true if all of the Pod's PVCs have matching PVs or can be dynamic provisioned,
|
||||
// and returns true if bound volumes satisfy the PV NodeAffinity.
|
||||
//
|
||||
// This function is called by the volume binding scheduler predicate and can be called in parallel
|
||||
FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolumesSatisified, boundVolumesSatisfied bool, err error)
|
||||
|
||||
// AssumePodVolumes will:
|
||||
// 1. Take the PV matches for unbound PVCs and update the PV cache assuming
|
||||
// that the PV is prebound to the PVC.
|
||||
// 2. Take the PVCs that need provisioning and update the PVC cache with related
|
||||
// annotations set.
|
||||
//
|
||||
// It returns true if all volumes are fully bound
|
||||
//
|
||||
// This function will modify assumedPod with the node name.
|
||||
// This function is called serially.
|
||||
AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (allFullyBound bool, err error)
|
||||
|
||||
// BindPodVolumes will:
|
||||
// 1. Initiate the volume binding by making the API call to prebind the PV
|
||||
// to its matching PVC.
|
||||
// 2. Trigger the volume provisioning by making the API call to set related
|
||||
// annotations on the PVC
|
||||
// 3. Wait for PVCs to be completely bound by the PV controller
|
||||
//
|
||||
// This function can be called in parallel.
|
||||
BindPodVolumes(assumedPod *v1.Pod) error
|
||||
|
||||
// GetBindingsCache returns the cache used (if any) to store volume binding decisions.
|
||||
GetBindingsCache() PodBindingCache
|
||||
}
|
||||
|
||||
type volumeBinder struct {
|
||||
kubeClient clientset.Interface
|
||||
classLister storagelisters.StorageClassLister
|
||||
|
||||
nodeInformer coreinformers.NodeInformer
|
||||
pvcCache PVCAssumeCache
|
||||
pvCache PVAssumeCache
|
||||
|
||||
// Stores binding decisions that were made in FindPodVolumes for use in AssumePodVolumes.
|
||||
// AssumePodVolumes modifies the bindings again for use in BindPodVolumes.
|
||||
podBindingCache PodBindingCache
|
||||
|
||||
// Amount of time to wait for the bind operation to succeed
|
||||
bindTimeout time.Duration
|
||||
}
|
||||
|
||||
// NewVolumeBinder sets up all the caches needed for the scheduler to make volume binding decisions.
|
||||
func NewVolumeBinder(
|
||||
kubeClient clientset.Interface,
|
||||
nodeInformer coreinformers.NodeInformer,
|
||||
pvcInformer coreinformers.PersistentVolumeClaimInformer,
|
||||
pvInformer coreinformers.PersistentVolumeInformer,
|
||||
storageClassInformer storageinformers.StorageClassInformer,
|
||||
bindTimeout time.Duration) SchedulerVolumeBinder {
|
||||
|
||||
b := &volumeBinder{
|
||||
kubeClient: kubeClient,
|
||||
classLister: storageClassInformer.Lister(),
|
||||
nodeInformer: nodeInformer,
|
||||
pvcCache: NewPVCAssumeCache(pvcInformer.Informer()),
|
||||
pvCache: NewPVAssumeCache(pvInformer.Informer()),
|
||||
podBindingCache: NewPodBindingCache(),
|
||||
bindTimeout: bindTimeout,
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *volumeBinder) GetBindingsCache() PodBindingCache {
|
||||
return b.podBindingCache
|
||||
}
|
||||
|
||||
func podHasClaims(pod *v1.Pod) bool {
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
if vol.PersistentVolumeClaim != nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// FindPodVolumes caches the matching PVs and PVCs to provision per node in podBindingCache.
|
||||
// This method intentionally takes in a *v1.Node object instead of using volumebinder.nodeInformer.
|
||||
// That's necessary because some operations will need to pass in to the predicate fake node objects.
|
||||
func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolumesSatisfied, boundVolumesSatisfied bool, err error) {
|
||||
podName := getPodName(pod)
|
||||
|
||||
// Warning: Below log needs high verbosity as it can be printed several times (#60933).
|
||||
klog.V(5).Infof("FindPodVolumes for pod %q, node %q", podName, node.Name)
|
||||
|
||||
// Initialize to true for pods that don't have volumes
|
||||
unboundVolumesSatisfied = true
|
||||
boundVolumesSatisfied = true
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds())
|
||||
if err != nil {
|
||||
VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
if !podHasClaims(pod) {
|
||||
// Fast path
|
||||
return unboundVolumesSatisfied, boundVolumesSatisfied, nil
|
||||
}
|
||||
|
||||
var (
|
||||
matchedClaims []*bindingInfo
|
||||
provisionedClaims []*v1.PersistentVolumeClaim
|
||||
)
|
||||
defer func() {
|
||||
// We recreate bindings for each new schedule loop.
|
||||
if len(matchedClaims) == 0 && len(provisionedClaims) == 0 {
|
||||
// Clear cache if no claims to bind or provision for this node.
|
||||
b.podBindingCache.ClearBindings(pod, node.Name)
|
||||
return
|
||||
}
|
||||
// Although we do not distinguish nil from empty in this function, for
|
||||
// easier testing, we normalize empty to nil.
|
||||
if len(matchedClaims) == 0 {
|
||||
matchedClaims = nil
|
||||
}
|
||||
if len(provisionedClaims) == 0 {
|
||||
provisionedClaims = nil
|
||||
}
|
||||
// Mark cache with all matched and provisioned claims for this node
|
||||
b.podBindingCache.UpdateBindings(pod, node.Name, matchedClaims, provisionedClaims)
|
||||
}()
|
||||
|
||||
// The pod's volumes need to be processed in one call to avoid the race condition where
|
||||
// volumes can get bound/provisioned in between calls.
|
||||
boundClaims, claimsToBind, unboundClaimsImmediate, err := b.getPodVolumes(pod)
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
|
||||
// Immediate claims should be bound
|
||||
if len(unboundClaimsImmediate) > 0 {
|
||||
return false, false, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
|
||||
}
|
||||
|
||||
// Check PV node affinity on bound volumes
|
||||
if len(boundClaims) > 0 {
|
||||
boundVolumesSatisfied, err = b.checkBoundClaims(boundClaims, node, podName)
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
}
|
||||
|
||||
// Find matching volumes and node for unbound claims
|
||||
if len(claimsToBind) > 0 {
|
||||
var (
|
||||
claimsToFindMatching []*v1.PersistentVolumeClaim
|
||||
claimsToProvision []*v1.PersistentVolumeClaim
|
||||
)
|
||||
|
||||
// Filter out claims to provision
|
||||
for _, claim := range claimsToBind {
|
||||
if selectedNode, ok := claim.Annotations[annSelectedNode]; ok {
|
||||
if selectedNode != node.Name {
|
||||
// Fast path, skip unmatched node
|
||||
return false, boundVolumesSatisfied, nil
|
||||
}
|
||||
claimsToProvision = append(claimsToProvision, claim)
|
||||
} else {
|
||||
claimsToFindMatching = append(claimsToFindMatching, claim)
|
||||
}
|
||||
}
|
||||
|
||||
// Find matching volumes
|
||||
if len(claimsToFindMatching) > 0 {
|
||||
var unboundClaims []*v1.PersistentVolumeClaim
|
||||
unboundVolumesSatisfied, matchedClaims, unboundClaims, err = b.findMatchingVolumes(pod, claimsToFindMatching, node)
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
claimsToProvision = append(claimsToProvision, unboundClaims...)
|
||||
}
|
||||
|
||||
// Check for claims to provision
|
||||
if len(claimsToProvision) > 0 {
|
||||
unboundVolumesSatisfied, provisionedClaims, err = b.checkVolumeProvisions(pod, claimsToProvision, node)
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return unboundVolumesSatisfied, boundVolumesSatisfied, nil
|
||||
}
|
||||
|
||||
// AssumePodVolumes will take the cached matching PVs and PVCs to provision
|
||||
// in podBindingCache for the chosen node, and:
|
||||
// 1. Update the pvCache with the new prebound PV.
|
||||
// 2. Update the pvcCache with the new PVCs with annotations set
|
||||
// 3. Update podBindingCache again with cached API updates for PVs and PVCs.
|
||||
func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (allFullyBound bool, err error) {
|
||||
podName := getPodName(assumedPod)
|
||||
|
||||
klog.V(4).Infof("AssumePodVolumes for pod %q, node %q", podName, nodeName)
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
VolumeSchedulingStageLatency.WithLabelValues("assume").Observe(time.Since(start).Seconds())
|
||||
if err != nil {
|
||||
VolumeSchedulingStageFailed.WithLabelValues("assume").Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
if allBound := b.arePodVolumesBound(assumedPod); allBound {
|
||||
klog.V(4).Infof("AssumePodVolumes for pod %q, node %q: all PVCs bound and nothing to do", podName, nodeName)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
assumedPod.Spec.NodeName = nodeName
|
||||
|
||||
claimsToBind := b.podBindingCache.GetBindings(assumedPod, nodeName)
|
||||
claimsToProvision := b.podBindingCache.GetProvisionedPVCs(assumedPod, nodeName)
|
||||
|
||||
// Assume PV
|
||||
newBindings := []*bindingInfo{}
|
||||
for _, binding := range claimsToBind {
|
||||
newPV, dirty, err := GetBindVolumeToClaim(binding.pv, binding.pvc)
|
||||
klog.V(5).Infof("AssumePodVolumes: GetBindVolumeToClaim for pod %q, PV %q, PVC %q. newPV %p, dirty %v, err: %v",
|
||||
podName,
|
||||
binding.pv.Name,
|
||||
binding.pvc.Name,
|
||||
newPV,
|
||||
dirty,
|
||||
err)
|
||||
if err != nil {
|
||||
b.revertAssumedPVs(newBindings)
|
||||
return false, err
|
||||
}
|
||||
// TODO: can we assume everytime?
|
||||
if dirty {
|
||||
err = b.pvCache.Assume(newPV)
|
||||
if err != nil {
|
||||
b.revertAssumedPVs(newBindings)
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
newBindings = append(newBindings, &bindingInfo{pv: newPV, pvc: binding.pvc})
|
||||
}
|
||||
|
||||
// Assume PVCs
|
||||
newProvisionedPVCs := []*v1.PersistentVolumeClaim{}
|
||||
for _, claim := range claimsToProvision {
|
||||
// The claims from method args can be pointing to watcher cache. We must not
|
||||
// modify these, therefore create a copy.
|
||||
claimClone := claim.DeepCopy()
|
||||
metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, annSelectedNode, nodeName)
|
||||
err = b.pvcCache.Assume(claimClone)
|
||||
if err != nil {
|
||||
b.revertAssumedPVs(newBindings)
|
||||
b.revertAssumedPVCs(newProvisionedPVCs)
|
||||
return
|
||||
}
|
||||
|
||||
newProvisionedPVCs = append(newProvisionedPVCs, claimClone)
|
||||
}
|
||||
|
||||
// Update cache with the assumed pvcs and pvs
|
||||
// Even if length is zero, update the cache with an empty slice to indicate that no
|
||||
// operations are needed
|
||||
b.podBindingCache.UpdateBindings(assumedPod, nodeName, newBindings, newProvisionedPVCs)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// BindPodVolumes gets the cached bindings and PVCs to provision in podBindingCache,
|
||||
// makes the API update for those PVs/PVCs, and waits for the PVCs to be completely bound
|
||||
// by the PV controller.
|
||||
func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) (err error) {
|
||||
podName := getPodName(assumedPod)
|
||||
klog.V(4).Infof("BindPodVolumes for pod %q, node %q", podName, assumedPod.Spec.NodeName)
|
||||
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
VolumeSchedulingStageLatency.WithLabelValues("bind").Observe(time.Since(start).Seconds())
|
||||
if err != nil {
|
||||
VolumeSchedulingStageFailed.WithLabelValues("bind").Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
bindings := b.podBindingCache.GetBindings(assumedPod, assumedPod.Spec.NodeName)
|
||||
claimsToProvision := b.podBindingCache.GetProvisionedPVCs(assumedPod, assumedPod.Spec.NodeName)
|
||||
|
||||
// Start API operations
|
||||
err = b.bindAPIUpdate(podName, bindings, claimsToProvision)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return wait.Poll(time.Second, b.bindTimeout, func() (bool, error) {
|
||||
b, err := b.checkBindings(assumedPod, bindings, claimsToProvision)
|
||||
return b, err
|
||||
})
|
||||
}
|
||||
|
||||
func getPodName(pod *v1.Pod) string {
|
||||
return pod.Namespace + "/" + pod.Name
|
||||
}
|
||||
|
||||
func getPVCName(pvc *v1.PersistentVolumeClaim) string {
|
||||
return pvc.Namespace + "/" + pvc.Name
|
||||
}
|
||||
|
||||
// bindAPIUpdate gets the cached bindings and PVCs to provision in podBindingCache
|
||||
// and makes the API update for those PVs/PVCs.
|
||||
func (b *volumeBinder) bindAPIUpdate(podName string, bindings []*bindingInfo, claimsToProvision []*v1.PersistentVolumeClaim) error {
|
||||
if bindings == nil {
|
||||
return fmt.Errorf("failed to get cached bindings for pod %q", podName)
|
||||
}
|
||||
if claimsToProvision == nil {
|
||||
return fmt.Errorf("failed to get cached claims to provision for pod %q", podName)
|
||||
}
|
||||
|
||||
lastProcessedBinding := 0
|
||||
lastProcessedProvisioning := 0
|
||||
defer func() {
|
||||
// only revert assumed cached updates for volumes we haven't successfully bound
|
||||
if lastProcessedBinding < len(bindings) {
|
||||
b.revertAssumedPVs(bindings[lastProcessedBinding:])
|
||||
}
|
||||
// only revert assumed cached updates for claims we haven't updated,
|
||||
if lastProcessedProvisioning < len(claimsToProvision) {
|
||||
b.revertAssumedPVCs(claimsToProvision[lastProcessedProvisioning:])
|
||||
}
|
||||
}()
|
||||
|
||||
var (
|
||||
binding *bindingInfo
|
||||
i int
|
||||
claim *v1.PersistentVolumeClaim
|
||||
)
|
||||
|
||||
// Do the actual prebinding. Let the PV controller take care of the rest
|
||||
// There is no API rollback if the actual binding fails
|
||||
for _, binding = range bindings {
|
||||
klog.V(5).Infof("bindAPIUpdate: Pod %q, binding PV %q to PVC %q", podName, binding.pv.Name, binding.pvc.Name)
|
||||
// TODO: does it hurt if we make an api call and nothing needs to be updated?
|
||||
claimKey := claimToClaimKey(binding.pvc)
|
||||
klog.V(2).Infof("claim %q bound to volume %q", claimKey, binding.pv.Name)
|
||||
if newPV, err := b.kubeClient.CoreV1().PersistentVolumes().Update(binding.pv); err != nil {
|
||||
klog.V(4).Infof("updating PersistentVolume[%s]: binding to %q failed: %v", binding.pv.Name, claimKey, err)
|
||||
return err
|
||||
} else {
|
||||
klog.V(4).Infof("updating PersistentVolume[%s]: bound to %q", binding.pv.Name, claimKey)
|
||||
// Save updated object from apiserver for later checking.
|
||||
binding.pv = newPV
|
||||
}
|
||||
lastProcessedBinding++
|
||||
}
|
||||
|
||||
// Update claims objects to trigger volume provisioning. Let the PV controller take care of the rest
|
||||
// PV controller is expect to signal back by removing related annotations if actual provisioning fails
|
||||
for i, claim = range claimsToProvision {
|
||||
klog.V(5).Infof("bindAPIUpdate: Pod %q, PVC %q", podName, getPVCName(claim))
|
||||
if newClaim, err := b.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(claim); err != nil {
|
||||
return err
|
||||
} else {
|
||||
// Save updated object from apiserver for later checking.
|
||||
claimsToProvision[i] = newClaim
|
||||
}
|
||||
lastProcessedProvisioning++
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
versioner = etcd.APIObjectVersioner{}
|
||||
)
|
||||
|
||||
// checkBindings runs through all the PVCs in the Pod and checks:
|
||||
// * if the PVC is fully bound
|
||||
// * if there are any conditions that require binding to fail and be retried
|
||||
//
|
||||
// It returns true when all of the Pod's PVCs are fully bound, and error if
|
||||
// binding (and scheduling) needs to be retried
|
||||
// Note that it checks on API objects not PV/PVC cache, this is because
|
||||
// PV/PVC cache can be assumed again in main scheduler loop, we must check
|
||||
// latest state in API server which are shared with PV controller and
|
||||
// provisioners
|
||||
func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claimsToProvision []*v1.PersistentVolumeClaim) (bool, error) {
|
||||
podName := getPodName(pod)
|
||||
if bindings == nil {
|
||||
return false, fmt.Errorf("failed to get cached bindings for pod %q", podName)
|
||||
}
|
||||
if claimsToProvision == nil {
|
||||
return false, fmt.Errorf("failed to get cached claims to provision for pod %q", podName)
|
||||
}
|
||||
|
||||
node, err := b.nodeInformer.Lister().Get(pod.Spec.NodeName)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to get node %q: %v", pod.Spec.NodeName, err)
|
||||
}
|
||||
|
||||
// Check for any conditions that might require scheduling retry
|
||||
|
||||
// When pod is removed from scheduling queue because of deletion or any
|
||||
// other reasons, binding operation should be cancelled. There is no need
|
||||
// to check PV/PVC bindings any more.
|
||||
// We check pod binding cache here which will be cleared when pod is
|
||||
// removed from scheduling queue.
|
||||
if b.podBindingCache.GetDecisions(pod) == nil {
|
||||
return false, fmt.Errorf("pod %q does not exist any more", podName)
|
||||
}
|
||||
|
||||
for _, binding := range bindings {
|
||||
pv, err := b.pvCache.GetAPIPV(binding.pv.Name)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to check binding: %v", err)
|
||||
}
|
||||
|
||||
pvc, err := b.pvcCache.GetAPIPVC(getPVCName(binding.pvc))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to check binding: %v", err)
|
||||
}
|
||||
|
||||
// Because we updated PV in apiserver, skip if API object is older
|
||||
// and wait for new API object propagated from apiserver.
|
||||
if versioner.CompareResourceVersion(binding.pv, pv) > 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Check PV's node affinity (the node might not have the proper label)
|
||||
if err := volumeutil.CheckNodeAffinity(pv, node.Labels); err != nil {
|
||||
return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %v", pv.Name, node.Name, err)
|
||||
}
|
||||
|
||||
// Check if pv.ClaimRef got dropped by unbindVolume()
|
||||
if pv.Spec.ClaimRef == nil || pv.Spec.ClaimRef.UID == "" {
|
||||
return false, fmt.Errorf("ClaimRef got reset for pv %q", pv.Name)
|
||||
}
|
||||
|
||||
// Check if pvc is fully bound
|
||||
if !b.isPVCFullyBound(pvc) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
for _, claim := range claimsToProvision {
|
||||
pvc, err := b.pvcCache.GetAPIPVC(getPVCName(claim))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to check provisioning pvc: %v", err)
|
||||
}
|
||||
|
||||
// Because we updated PVC in apiserver, skip if API object is older
|
||||
// and wait for new API object propagated from apiserver.
|
||||
if versioner.CompareResourceVersion(claim, pvc) > 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Check if selectedNode annotation is still set
|
||||
if pvc.Annotations == nil {
|
||||
return false, fmt.Errorf("selectedNode annotation reset for PVC %q", pvc.Name)
|
||||
}
|
||||
selectedNode := pvc.Annotations[annSelectedNode]
|
||||
if selectedNode != pod.Spec.NodeName {
|
||||
return false, fmt.Errorf("selectedNode annotation value %q not set to scheduled node %q", selectedNode, pod.Spec.NodeName)
|
||||
}
|
||||
|
||||
// If the PVC is bound to a PV, check its node affinity
|
||||
if pvc.Spec.VolumeName != "" {
|
||||
pv, err := b.pvCache.GetAPIPV(pvc.Spec.VolumeName)
|
||||
if err != nil {
|
||||
if _, ok := err.(*errNotFound); ok {
|
||||
// We tolerate NotFound error here, because PV is possibly
|
||||
// not found because of API delay, we can check next time.
|
||||
// And if PV does not exist because it's deleted, PVC will
|
||||
// be unbound eventually.
|
||||
return false, nil
|
||||
} else {
|
||||
return false, fmt.Errorf("failed to get pv %q from cache: %v", pvc.Spec.VolumeName, err)
|
||||
}
|
||||
}
|
||||
if err := volumeutil.CheckNodeAffinity(pv, node.Labels); err != nil {
|
||||
return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %v", pv.Name, node.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if pvc is fully bound
|
||||
if !b.isPVCFullyBound(pvc) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// All pvs and pvcs that we operated on are bound
|
||||
klog.V(4).Infof("All PVCs for pod %q are bound", podName)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (b *volumeBinder) isVolumeBound(namespace string, vol *v1.Volume) (bool, *v1.PersistentVolumeClaim, error) {
|
||||
if vol.PersistentVolumeClaim == nil {
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
pvcName := vol.PersistentVolumeClaim.ClaimName
|
||||
return b.isPVCBound(namespace, pvcName)
|
||||
}
|
||||
|
||||
func (b *volumeBinder) isPVCBound(namespace, pvcName string) (bool, *v1.PersistentVolumeClaim, error) {
|
||||
claim := &v1.PersistentVolumeClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pvcName,
|
||||
Namespace: namespace,
|
||||
},
|
||||
}
|
||||
pvcKey := getPVCName(claim)
|
||||
pvc, err := b.pvcCache.GetPVC(pvcKey)
|
||||
if err != nil || pvc == nil {
|
||||
return false, nil, fmt.Errorf("error getting PVC %q: %v", pvcKey, err)
|
||||
}
|
||||
|
||||
fullyBound := b.isPVCFullyBound(pvc)
|
||||
if fullyBound {
|
||||
klog.V(5).Infof("PVC %q is fully bound to PV %q", pvcKey, pvc.Spec.VolumeName)
|
||||
} else {
|
||||
if pvc.Spec.VolumeName != "" {
|
||||
klog.V(5).Infof("PVC %q is not fully bound to PV %q", pvcKey, pvc.Spec.VolumeName)
|
||||
} else {
|
||||
klog.V(5).Infof("PVC %q is not bound", pvcKey)
|
||||
}
|
||||
}
|
||||
return fullyBound, pvc, nil
|
||||
}
|
||||
|
||||
func (b *volumeBinder) isPVCFullyBound(pvc *v1.PersistentVolumeClaim) bool {
|
||||
return pvc.Spec.VolumeName != "" && metav1.HasAnnotation(pvc.ObjectMeta, annBindCompleted)
|
||||
}
|
||||
|
||||
// arePodVolumesBound returns true if all volumes are fully bound
|
||||
func (b *volumeBinder) arePodVolumesBound(pod *v1.Pod) bool {
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
if isBound, _, _ := b.isVolumeBound(pod.Namespace, &vol); !isBound {
|
||||
// Pod has at least one PVC that needs binding
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// getPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
|
||||
// and unbound with immediate binding (including prebound)
|
||||
func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentVolumeClaim, unboundClaims []*v1.PersistentVolumeClaim, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
|
||||
boundClaims = []*v1.PersistentVolumeClaim{}
|
||||
unboundClaimsImmediate = []*v1.PersistentVolumeClaim{}
|
||||
unboundClaims = []*v1.PersistentVolumeClaim{}
|
||||
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
volumeBound, pvc, err := b.isVolumeBound(pod.Namespace, &vol)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
if pvc == nil {
|
||||
continue
|
||||
}
|
||||
if volumeBound {
|
||||
boundClaims = append(boundClaims, pvc)
|
||||
} else {
|
||||
delayBindingMode, err := IsDelayBindingMode(pvc, b.classLister)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
// Prebound PVCs are treated as unbound immediate binding
|
||||
if delayBindingMode && pvc.Spec.VolumeName == "" {
|
||||
// Scheduler path
|
||||
unboundClaims = append(unboundClaims, pvc)
|
||||
} else {
|
||||
// !delayBindingMode || pvc.Spec.VolumeName != ""
|
||||
// Immediate binding should have already been bound
|
||||
unboundClaimsImmediate = append(unboundClaimsImmediate, pvc)
|
||||
}
|
||||
}
|
||||
}
|
||||
return boundClaims, unboundClaims, unboundClaimsImmediate, nil
|
||||
}
|
||||
|
||||
func (b *volumeBinder) checkBoundClaims(claims []*v1.PersistentVolumeClaim, node *v1.Node, podName string) (bool, error) {
|
||||
for _, pvc := range claims {
|
||||
pvName := pvc.Spec.VolumeName
|
||||
pv, err := b.pvCache.GetPV(pvName)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
err = volumeutil.CheckNodeAffinity(pv, node.Labels)
|
||||
if err != nil {
|
||||
klog.V(4).Infof("PersistentVolume %q, Node %q mismatch for Pod %q: %v", pvName, node.Name, podName, err)
|
||||
return false, nil
|
||||
}
|
||||
klog.V(5).Infof("PersistentVolume %q, Node %q matches for Pod %q", pvName, node.Name, podName)
|
||||
}
|
||||
|
||||
klog.V(4).Infof("All bound volumes for Pod %q match with Node %q", podName, node.Name)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// findMatchingVolumes tries to find matching volumes for given claims,
|
||||
// and return unbound claims for further provision.
|
||||
func (b *volumeBinder) findMatchingVolumes(pod *v1.Pod, claimsToBind []*v1.PersistentVolumeClaim, node *v1.Node) (foundMatches bool, matchedClaims []*bindingInfo, unboundClaims []*v1.PersistentVolumeClaim, err error) {
|
||||
podName := getPodName(pod)
|
||||
// Sort all the claims by increasing size request to get the smallest fits
|
||||
sort.Sort(byPVCSize(claimsToBind))
|
||||
|
||||
chosenPVs := map[string]*v1.PersistentVolume{}
|
||||
|
||||
foundMatches = true
|
||||
matchedClaims = []*bindingInfo{}
|
||||
|
||||
for _, pvc := range claimsToBind {
|
||||
// Get storage class name from each PVC
|
||||
storageClassName := ""
|
||||
storageClass := pvc.Spec.StorageClassName
|
||||
if storageClass != nil {
|
||||
storageClassName = *storageClass
|
||||
}
|
||||
allPVs := b.pvCache.ListPVs(storageClassName)
|
||||
pvcName := getPVCName(pvc)
|
||||
|
||||
// Find a matching PV
|
||||
pv, err := findMatchingVolume(pvc, allPVs, node, chosenPVs, true)
|
||||
if err != nil {
|
||||
return false, nil, nil, err
|
||||
}
|
||||
if pv == nil {
|
||||
klog.V(4).Infof("No matching volumes for Pod %q, PVC %q on node %q", podName, pvcName, node.Name)
|
||||
unboundClaims = append(unboundClaims, pvc)
|
||||
foundMatches = false
|
||||
continue
|
||||
}
|
||||
|
||||
// matching PV needs to be excluded so we don't select it again
|
||||
chosenPVs[pv.Name] = pv
|
||||
matchedClaims = append(matchedClaims, &bindingInfo{pv: pv, pvc: pvc})
|
||||
klog.V(5).Infof("Found matching PV %q for PVC %q on node %q for pod %q", pv.Name, pvcName, node.Name, podName)
|
||||
}
|
||||
|
||||
if foundMatches {
|
||||
klog.V(4).Infof("Found matching volumes for pod %q on node %q", podName, node.Name)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// checkVolumeProvisions checks given unbound claims (the claims have gone through func
|
||||
// findMatchingVolumes, and do not have matching volumes for binding), and return true
|
||||
// if all of the claims are eligible for dynamic provision.
|
||||
func (b *volumeBinder) checkVolumeProvisions(pod *v1.Pod, claimsToProvision []*v1.PersistentVolumeClaim, node *v1.Node) (provisionSatisfied bool, provisionedClaims []*v1.PersistentVolumeClaim, err error) {
|
||||
podName := getPodName(pod)
|
||||
provisionedClaims = []*v1.PersistentVolumeClaim{}
|
||||
|
||||
for _, claim := range claimsToProvision {
|
||||
pvcName := getPVCName(claim)
|
||||
className := v1helper.GetPersistentVolumeClaimClass(claim)
|
||||
if className == "" {
|
||||
return false, nil, fmt.Errorf("no class for claim %q", pvcName)
|
||||
}
|
||||
|
||||
class, err := b.classLister.Get(className)
|
||||
if err != nil {
|
||||
return false, nil, fmt.Errorf("failed to find storage class %q", className)
|
||||
}
|
||||
provisioner := class.Provisioner
|
||||
if provisioner == "" || provisioner == notSupportedProvisioner {
|
||||
klog.V(4).Infof("storage class %q of claim %q does not support dynamic provisioning", className, pvcName)
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
// Check if the node can satisfy the topology requirement in the class
|
||||
if !v1helper.MatchTopologySelectorTerms(class.AllowedTopologies, labels.Set(node.Labels)) {
|
||||
klog.V(4).Infof("Node %q cannot satisfy provisioning topology requirements of claim %q", node.Name, pvcName)
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
// TODO: Check if capacity of the node domain in the storage class
|
||||
// can satisfy resource requirement of given claim
|
||||
|
||||
provisionedClaims = append(provisionedClaims, claim)
|
||||
|
||||
}
|
||||
klog.V(4).Infof("Provisioning for claims of pod %q that has no matching volumes on node %q ...", podName, node.Name)
|
||||
|
||||
return true, provisionedClaims, nil
|
||||
}
|
||||
|
||||
func (b *volumeBinder) revertAssumedPVs(bindings []*bindingInfo) {
|
||||
for _, bindingInfo := range bindings {
|
||||
b.pvCache.Restore(bindingInfo.pv.Name)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *volumeBinder) revertAssumedPVCs(claims []*v1.PersistentVolumeClaim) {
|
||||
for _, claim := range claims {
|
||||
b.pvcCache.Restore(getPVCName(claim))
|
||||
}
|
||||
}
|
||||
|
||||
type bindingInfo struct {
|
||||
// Claim that needs to be bound
|
||||
pvc *v1.PersistentVolumeClaim
|
||||
|
||||
// Proposed PV to bind to this claim
|
||||
pv *v1.PersistentVolume
|
||||
}
|
||||
|
||||
type byPVCSize []*v1.PersistentVolumeClaim
|
||||
|
||||
func (a byPVCSize) Len() int {
|
||||
return len(a)
|
||||
}
|
||||
|
||||
func (a byPVCSize) Swap(i, j int) {
|
||||
a[i], a[j] = a[j], a[i]
|
||||
}
|
||||
|
||||
func (a byPVCSize) Less(i, j int) bool {
|
||||
iSize := a[i].Spec.Resources.Requests[v1.ResourceStorage]
|
||||
jSize := a[j].Spec.Resources.Requests[v1.ResourceStorage]
|
||||
// return true if iSize is less than jSize
|
||||
return iSize.Cmp(jSize) == -1
|
||||
}
|
165
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_binder_cache.go
generated
vendored
Normal file
165
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_binder_cache.go
generated
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// podBindingCache stores PV binding decisions per pod per node.
|
||||
// Pod entries are removed when the Pod is deleted or updated to
|
||||
// no longer be schedulable.
|
||||
type PodBindingCache interface {
|
||||
// UpdateBindings will update the cache with the given bindings for the
|
||||
// pod and node.
|
||||
UpdateBindings(pod *v1.Pod, node string, bindings []*bindingInfo, provisionings []*v1.PersistentVolumeClaim)
|
||||
|
||||
// ClearBindings will clear the cached bindings for the given pod and node.
|
||||
ClearBindings(pod *v1.Pod, node string)
|
||||
|
||||
// GetBindings will return the cached bindings for the given pod and node.
|
||||
// A nil return value means that the entry was not found. An empty slice
|
||||
// means that no binding operations are needed.
|
||||
GetBindings(pod *v1.Pod, node string) []*bindingInfo
|
||||
|
||||
// A nil return value means that the entry was not found. An empty slice
|
||||
// means that no provisioning operations are needed.
|
||||
GetProvisionedPVCs(pod *v1.Pod, node string) []*v1.PersistentVolumeClaim
|
||||
|
||||
// GetDecisions will return all cached decisions for the given pod.
|
||||
GetDecisions(pod *v1.Pod) nodeDecisions
|
||||
|
||||
// DeleteBindings will remove all cached bindings and provisionings for the given pod.
|
||||
// TODO: separate the func if it is needed to delete bindings/provisionings individually
|
||||
DeleteBindings(pod *v1.Pod)
|
||||
}
|
||||
|
||||
type podBindingCache struct {
|
||||
// synchronizes bindingDecisions
|
||||
rwMutex sync.RWMutex
|
||||
|
||||
// Key = pod name
|
||||
// Value = nodeDecisions
|
||||
bindingDecisions map[string]nodeDecisions
|
||||
}
|
||||
|
||||
// Key = nodeName
|
||||
// Value = bindings & provisioned PVCs of the node
|
||||
type nodeDecisions map[string]nodeDecision
|
||||
|
||||
// A decision includes bindingInfo and provisioned PVCs of the node
|
||||
type nodeDecision struct {
|
||||
bindings []*bindingInfo
|
||||
provisionings []*v1.PersistentVolumeClaim
|
||||
}
|
||||
|
||||
func NewPodBindingCache() PodBindingCache {
|
||||
return &podBindingCache{bindingDecisions: map[string]nodeDecisions{}}
|
||||
}
|
||||
|
||||
func (c *podBindingCache) GetDecisions(pod *v1.Pod) nodeDecisions {
|
||||
c.rwMutex.RLock()
|
||||
defer c.rwMutex.RUnlock()
|
||||
podName := getPodName(pod)
|
||||
decisions, ok := c.bindingDecisions[podName]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return decisions
|
||||
}
|
||||
|
||||
func (c *podBindingCache) DeleteBindings(pod *v1.Pod) {
|
||||
c.rwMutex.Lock()
|
||||
defer c.rwMutex.Unlock()
|
||||
|
||||
podName := getPodName(pod)
|
||||
|
||||
if _, ok := c.bindingDecisions[podName]; ok {
|
||||
delete(c.bindingDecisions, podName)
|
||||
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("delete").Inc()
|
||||
}
|
||||
}
|
||||
|
||||
func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*bindingInfo, pvcs []*v1.PersistentVolumeClaim) {
|
||||
c.rwMutex.Lock()
|
||||
defer c.rwMutex.Unlock()
|
||||
|
||||
podName := getPodName(pod)
|
||||
decisions, ok := c.bindingDecisions[podName]
|
||||
if !ok {
|
||||
decisions = nodeDecisions{}
|
||||
c.bindingDecisions[podName] = decisions
|
||||
}
|
||||
decision, ok := decisions[node]
|
||||
if !ok {
|
||||
decision = nodeDecision{
|
||||
bindings: bindings,
|
||||
provisionings: pvcs,
|
||||
}
|
||||
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("add").Inc()
|
||||
} else {
|
||||
decision.bindings = bindings
|
||||
decision.provisionings = pvcs
|
||||
}
|
||||
decisions[node] = decision
|
||||
}
|
||||
|
||||
func (c *podBindingCache) GetBindings(pod *v1.Pod, node string) []*bindingInfo {
|
||||
c.rwMutex.RLock()
|
||||
defer c.rwMutex.RUnlock()
|
||||
|
||||
podName := getPodName(pod)
|
||||
decisions, ok := c.bindingDecisions[podName]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
decision, ok := decisions[node]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return decision.bindings
|
||||
}
|
||||
|
||||
func (c *podBindingCache) GetProvisionedPVCs(pod *v1.Pod, node string) []*v1.PersistentVolumeClaim {
|
||||
c.rwMutex.RLock()
|
||||
defer c.rwMutex.RUnlock()
|
||||
|
||||
podName := getPodName(pod)
|
||||
decisions, ok := c.bindingDecisions[podName]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
decision, ok := decisions[node]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return decision.provisionings
|
||||
}
|
||||
|
||||
func (c *podBindingCache) ClearBindings(pod *v1.Pod, node string) {
|
||||
c.rwMutex.Lock()
|
||||
defer c.rwMutex.Unlock()
|
||||
|
||||
podName := getPodName(pod)
|
||||
decisions, ok := c.bindingDecisions[podName]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
delete(decisions, node)
|
||||
}
|
60
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_binder_fake.go
generated
vendored
Normal file
60
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/scheduler_binder_fake.go
generated
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import "k8s.io/api/core/v1"
|
||||
|
||||
type FakeVolumeBinderConfig struct {
|
||||
AllBound bool
|
||||
FindUnboundSatsified bool
|
||||
FindBoundSatsified bool
|
||||
FindErr error
|
||||
AssumeErr error
|
||||
BindErr error
|
||||
}
|
||||
|
||||
// NewVolumeBinder sets up all the caches needed for the scheduler to make
|
||||
// topology-aware volume binding decisions.
|
||||
func NewFakeVolumeBinder(config *FakeVolumeBinderConfig) *FakeVolumeBinder {
|
||||
return &FakeVolumeBinder{
|
||||
config: config,
|
||||
}
|
||||
}
|
||||
|
||||
type FakeVolumeBinder struct {
|
||||
config *FakeVolumeBinderConfig
|
||||
AssumeCalled bool
|
||||
BindCalled bool
|
||||
}
|
||||
|
||||
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolumesSatisfied, boundVolumesSatsified bool, err error) {
|
||||
return b.config.FindUnboundSatsified, b.config.FindBoundSatsified, b.config.FindErr
|
||||
}
|
||||
|
||||
func (b *FakeVolumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (bool, error) {
|
||||
b.AssumeCalled = true
|
||||
return b.config.AllBound, b.config.AssumeErr
|
||||
}
|
||||
|
||||
func (b *FakeVolumeBinder) BindPodVolumes(assumedPod *v1.Pod) error {
|
||||
b.BindCalled = true
|
||||
return b.config.BindErr
|
||||
}
|
||||
|
||||
func (b *FakeVolumeBinder) GetBindingsCache() PodBindingCache {
|
||||
return nil
|
||||
}
|
103
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/util.go
generated
vendored
Normal file
103
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/util.go
generated
vendored
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
storage "k8s.io/api/storage/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
storagelisters "k8s.io/client-go/listers/storage/v1"
|
||||
"k8s.io/client-go/tools/reference"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
)
|
||||
|
||||
// IsDelayBindingMode checks if claim is in delay binding mode.
|
||||
func IsDelayBindingMode(claim *v1.PersistentVolumeClaim, classLister storagelisters.StorageClassLister) (bool, error) {
|
||||
className := v1helper.GetPersistentVolumeClaimClass(claim)
|
||||
if className == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
class, err := classLister.Get(className)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if class.VolumeBindingMode == nil {
|
||||
return false, fmt.Errorf("VolumeBindingMode not set for StorageClass %q", className)
|
||||
}
|
||||
|
||||
return *class.VolumeBindingMode == storage.VolumeBindingWaitForFirstConsumer, nil
|
||||
}
|
||||
|
||||
// GetBindVolumeToClaim returns a new volume which is bound to given claim. In
|
||||
// addition, it returns a bool which indicates whether we made modification on
|
||||
// original volume.
|
||||
func GetBindVolumeToClaim(volume *v1.PersistentVolume, claim *v1.PersistentVolumeClaim) (*v1.PersistentVolume, bool, error) {
|
||||
dirty := false
|
||||
|
||||
// Check if the volume was already bound (either by user or by controller)
|
||||
shouldSetBoundByController := false
|
||||
if !IsVolumeBoundToClaim(volume, claim) {
|
||||
shouldSetBoundByController = true
|
||||
}
|
||||
|
||||
// The volume from method args can be pointing to watcher cache. We must not
|
||||
// modify these, therefore create a copy.
|
||||
volumeClone := volume.DeepCopy()
|
||||
|
||||
// Bind the volume to the claim if it is not bound yet
|
||||
if volume.Spec.ClaimRef == nil ||
|
||||
volume.Spec.ClaimRef.Name != claim.Name ||
|
||||
volume.Spec.ClaimRef.Namespace != claim.Namespace ||
|
||||
volume.Spec.ClaimRef.UID != claim.UID {
|
||||
|
||||
claimRef, err := reference.GetReference(scheme.Scheme, claim)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("Unexpected error getting claim reference: %v", err)
|
||||
}
|
||||
volumeClone.Spec.ClaimRef = claimRef
|
||||
dirty = true
|
||||
}
|
||||
|
||||
// Set annBoundByController if it is not set yet
|
||||
if shouldSetBoundByController && !metav1.HasAnnotation(volumeClone.ObjectMeta, annBoundByController) {
|
||||
metav1.SetMetaDataAnnotation(&volumeClone.ObjectMeta, annBoundByController, "yes")
|
||||
dirty = true
|
||||
}
|
||||
|
||||
return volumeClone, dirty, nil
|
||||
}
|
||||
|
||||
// IsVolumeBoundToClaim returns true, if given volume is pre-bound or bound
|
||||
// to specific claim. Both claim.Name and claim.Namespace must be equal.
|
||||
// If claim.UID is present in volume.Spec.ClaimRef, it must be equal too.
|
||||
func IsVolumeBoundToClaim(volume *v1.PersistentVolume, claim *v1.PersistentVolumeClaim) bool {
|
||||
if volume.Spec.ClaimRef == nil {
|
||||
return false
|
||||
}
|
||||
if claim.Name != volume.Spec.ClaimRef.Name || claim.Namespace != volume.Spec.ClaimRef.Namespace {
|
||||
return false
|
||||
}
|
||||
if volume.Spec.ClaimRef.UID != "" && claim.UID != volume.Spec.ClaimRef.UID {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
138
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/volume_host.go
generated
vendored
Normal file
138
vendor/k8s.io/kubernetes/pkg/controller/volume/persistentvolume/volume_host.go
generated
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
|
||||
authenticationv1 "k8s.io/api/authentication/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/tools/record"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
"k8s.io/klog"
|
||||
"k8s.io/kubernetes/pkg/util/mount"
|
||||
vol "k8s.io/kubernetes/pkg/volume"
|
||||
"k8s.io/kubernetes/pkg/volume/util/subpath"
|
||||
)
|
||||
|
||||
// VolumeHost interface implementation for PersistentVolumeController.
|
||||
|
||||
var _ vol.VolumeHost = &PersistentVolumeController{}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetPluginDir(pluginName string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetVolumeDevicePluginDir(pluginName string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetPodsDir() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetPodVolumeDir(podUID types.UID, pluginName string, volumeName string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetPodPluginDir(podUID types.UID, pluginName string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetPodVolumeDeviceDir(ppodUID types.UID, pluginName string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetKubeClient() clientset.Interface {
|
||||
return ctrl.kubeClient
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) NewWrapperMounter(volName string, spec vol.Spec, pod *v1.Pod, opts vol.VolumeOptions) (vol.Mounter, error) {
|
||||
return nil, fmt.Errorf("PersistentVolumeController.NewWrapperMounter is not implemented")
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) NewWrapperUnmounter(volName string, spec vol.Spec, podUID types.UID) (vol.Unmounter, error) {
|
||||
return nil, fmt.Errorf("PersistentVolumeController.NewWrapperMounter is not implemented")
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetCloudProvider() cloudprovider.Interface {
|
||||
return ctrl.cloud
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetMounter(pluginName string) mount.Interface {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetHostName() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetHostIP() (net.IP, error) {
|
||||
return nil, fmt.Errorf("PersistentVolumeController.GetHostIP() is not implemented")
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetNodeAllocatable() (v1.ResourceList, error) {
|
||||
return v1.ResourceList{}, nil
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetSecretFunc() func(namespace, name string) (*v1.Secret, error) {
|
||||
return func(_, _ string) (*v1.Secret, error) {
|
||||
return nil, fmt.Errorf("GetSecret unsupported in PersistentVolumeController")
|
||||
}
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetConfigMapFunc() func(namespace, name string) (*v1.ConfigMap, error) {
|
||||
return func(_, _ string) (*v1.ConfigMap, error) {
|
||||
return nil, fmt.Errorf("GetConfigMap unsupported in PersistentVolumeController")
|
||||
}
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetServiceAccountTokenFunc() func(_, _ string, _ *authenticationv1.TokenRequest) (*authenticationv1.TokenRequest, error) {
|
||||
return func(_, _ string, _ *authenticationv1.TokenRequest) (*authenticationv1.TokenRequest, error) {
|
||||
return nil, fmt.Errorf("GetServiceAccountToken unsupported in PersistentVolumeController")
|
||||
}
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) DeleteServiceAccountTokenFunc() func(types.UID) {
|
||||
return func(types.UID) {
|
||||
klog.Errorf("DeleteServiceAccountToken unsupported in PersistentVolumeController")
|
||||
}
|
||||
}
|
||||
|
||||
func (adc *PersistentVolumeController) GetExec(pluginName string) mount.Exec {
|
||||
return mount.NewOsExec()
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetNodeLabels() (map[string]string, error) {
|
||||
return nil, fmt.Errorf("GetNodeLabels() unsupported in PersistentVolumeController")
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetNodeName() types.NodeName {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetEventRecorder() record.EventRecorder {
|
||||
return ctrl.eventRecorder
|
||||
}
|
||||
|
||||
func (ctrl *PersistentVolumeController) GetSubpather() subpath.Interface {
|
||||
// No volume plugin needs Subpaths in PV controller.
|
||||
return nil
|
||||
}
|
Reference in New Issue
Block a user