Migrate from snapClient.VolumesnapshotV1alpha1Client to

snapClient.SnapshotV1alpha1Client and also update kube dependency

Signed-off-by: Humble Chirammal <hchiramm@redhat.com>
This commit is contained in:
Humble Chirammal
2019-06-24 14:38:09 +05:30
committed by mergify[bot]
parent 3bc6771df8
commit 22ff5c0911
1031 changed files with 34242 additions and 177906 deletions

View File

@ -108,7 +108,7 @@ type SAControllerClientBuilder struct {
// config returns a complete clientConfig for constructing clients. This is separate in anticipation of composition
// which means that not all clientsets are known here
func (b SAControllerClientBuilder) Config(name string) (*restclient.Config, error) {
sa, err := b.getOrCreateServiceAccount(name)
sa, err := getOrCreateServiceAccount(b.CoreClient, b.Namespace, name)
if err != nil {
return nil, err
}
@ -177,30 +177,6 @@ func (b SAControllerClientBuilder) Config(name string) (*restclient.Config, erro
return clientConfig, nil
}
func (b SAControllerClientBuilder) getOrCreateServiceAccount(name string) (*v1.ServiceAccount, error) {
sa, err := b.CoreClient.ServiceAccounts(b.Namespace).Get(name, metav1.GetOptions{})
if err == nil {
return sa, nil
}
if !apierrors.IsNotFound(err) {
return nil, err
}
// Create the namespace if we can't verify it exists.
// Tolerate errors, since we don't know whether this component has namespace creation permissions.
if _, err := b.CoreClient.Namespaces().Get(b.Namespace, metav1.GetOptions{}); err != nil {
b.CoreClient.Namespaces().Create(&v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: b.Namespace}})
}
// Create the service account
sa, err = b.CoreClient.ServiceAccounts(b.Namespace).Create(&v1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Namespace: b.Namespace, Name: name}})
if apierrors.IsAlreadyExists(err) {
// If we're racing to init and someone else already created it, re-fetch
return b.CoreClient.ServiceAccounts(b.Namespace).Get(name, metav1.GetOptions{})
}
return sa, err
}
func (b SAControllerClientBuilder) getAuthenticatedConfig(sa *v1.ServiceAccount, token string) (*restclient.Config, bool, error) {
username := apiserverserviceaccount.MakeUsername(sa.Namespace, sa.Name)

View File

@ -0,0 +1,217 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
import (
"fmt"
"net/http"
"sync"
"time"
"golang.org/x/oauth2"
v1authenticationapi "k8s.io/api/authentication/v1"
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/wait"
apiserverserviceaccount "k8s.io/apiserver/pkg/authentication/serviceaccount"
clientset "k8s.io/client-go/kubernetes"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/transport"
"k8s.io/klog"
utilpointer "k8s.io/utils/pointer"
)
var (
// defaultExpirationSeconds defines the duration of a TokenRequest in seconds.
defaultExpirationSeconds = int64(3600)
// defaultLeewayPercent defines the percentage of expiration left before the client trigger a token rotation.
// range[0, 100]
defaultLeewayPercent = 20
)
type DynamicControllerClientBuilder struct {
// ClientConfig is a skeleton config to clone and use as the basis for each controller client
ClientConfig *restclient.Config
// CoreClient is used to provision service accounts if needed and watch for their associated tokens
// to construct a controller client
CoreClient v1core.CoreV1Interface
// Namespace is the namespace used to host the service accounts that will back the
// controllers. It must be highly privileged namespace which normal users cannot inspect.
Namespace string
// roundTripperFuncMap is a cache stores the corresponding roundtripper func for each
// service account
roundTripperFuncMap map[string]func(http.RoundTripper) http.RoundTripper
// expirationSeconds defines the token expiration seconds
expirationSeconds int64
// leewayPercent defines the percentage of expiration left before the client trigger a token rotation.
leewayPercent int
mutex sync.Mutex
clock clock.Clock
}
func NewDynamicClientBuilder(clientConfig *restclient.Config, coreClient v1core.CoreV1Interface, ns string) ControllerClientBuilder {
builder := &DynamicControllerClientBuilder{
ClientConfig: clientConfig,
CoreClient: coreClient,
Namespace: ns,
roundTripperFuncMap: map[string]func(http.RoundTripper) http.RoundTripper{},
expirationSeconds: defaultExpirationSeconds,
leewayPercent: defaultLeewayPercent,
clock: clock.RealClock{},
}
return builder
}
// this function only for test purpose, don't call it
func NewTestDynamicClientBuilder(clientConfig *restclient.Config, coreClient v1core.CoreV1Interface, ns string, expirationSeconds int64, leewayPercent int) ControllerClientBuilder {
builder := &DynamicControllerClientBuilder{
ClientConfig: clientConfig,
CoreClient: coreClient,
Namespace: ns,
roundTripperFuncMap: map[string]func(http.RoundTripper) http.RoundTripper{},
expirationSeconds: expirationSeconds,
leewayPercent: leewayPercent,
clock: clock.RealClock{},
}
return builder
}
func (t *DynamicControllerClientBuilder) Config(saName string) (*restclient.Config, error) {
_, err := getOrCreateServiceAccount(t.CoreClient, t.Namespace, saName)
if err != nil {
return nil, err
}
configCopy := constructClient(t.Namespace, saName, t.ClientConfig)
t.mutex.Lock()
defer t.mutex.Unlock()
rt, ok := t.roundTripperFuncMap[saName]
if ok {
configCopy.WrapTransport = rt
} else {
cachedTokenSource := transport.NewCachedTokenSource(&tokenSourceImpl{
namespace: t.Namespace,
serviceAccountName: saName,
coreClient: t.CoreClient,
expirationSeconds: t.expirationSeconds,
leewayPercent: t.leewayPercent,
})
configCopy.WrapTransport = transport.TokenSourceWrapTransport(cachedTokenSource)
t.roundTripperFuncMap[saName] = configCopy.WrapTransport
}
return &configCopy, nil
}
func (t *DynamicControllerClientBuilder) ConfigOrDie(name string) *restclient.Config {
clientConfig, err := t.Config(name)
if err != nil {
klog.Fatal(err)
}
return clientConfig
}
func (t *DynamicControllerClientBuilder) Client(name string) (clientset.Interface, error) {
clientConfig, err := t.Config(name)
if err != nil {
return nil, err
}
return clientset.NewForConfig(clientConfig)
}
func (t *DynamicControllerClientBuilder) ClientOrDie(name string) clientset.Interface {
client, err := t.Client(name)
if err != nil {
klog.Fatal(err)
}
return client
}
type tokenSourceImpl struct {
namespace string
serviceAccountName string
coreClient v1core.CoreV1Interface
expirationSeconds int64
leewayPercent int
}
func (ts *tokenSourceImpl) Token() (*oauth2.Token, error) {
var retTokenRequest *v1authenticationapi.TokenRequest
backoff := wait.Backoff{
Duration: 500 * time.Millisecond,
Factor: 2, // double the timeout for every failure
Steps: 4,
}
if err := wait.ExponentialBackoff(backoff, func() (bool, error) {
if _, inErr := getOrCreateServiceAccount(ts.coreClient, ts.namespace, ts.serviceAccountName); inErr != nil {
klog.Warningf("get or create service account failed: %v", inErr)
return false, nil
}
tr, inErr := ts.coreClient.ServiceAccounts(ts.namespace).CreateToken(ts.serviceAccountName, &v1authenticationapi.TokenRequest{
Spec: v1authenticationapi.TokenRequestSpec{
ExpirationSeconds: utilpointer.Int64Ptr(ts.expirationSeconds),
},
})
if inErr != nil {
klog.Warningf("get token failed: %v", inErr)
return false, nil
}
retTokenRequest = tr
return true, nil
}); err != nil {
return nil, fmt.Errorf("failed to get token for %s/%s: %v", ts.namespace, ts.serviceAccountName, err)
}
if retTokenRequest.Spec.ExpirationSeconds == nil {
return nil, fmt.Errorf("nil pointer of expiration in token request")
}
lifetime := retTokenRequest.Status.ExpirationTimestamp.Time.Sub(time.Now())
if lifetime < time.Minute*10 {
// possible clock skew issue, pin to minimum token lifetime
lifetime = time.Minute * 10
}
leeway := time.Duration(int64(lifetime) * int64(ts.leewayPercent) / 100)
expiry := time.Now().Add(lifetime).Add(-1 * leeway)
return &oauth2.Token{
AccessToken: retTokenRequest.Status.Token,
TokenType: "Bearer",
Expiry: expiry,
}, nil
}
func constructClient(saNamespace, saName string, config *restclient.Config) restclient.Config {
username := apiserverserviceaccount.MakeUsername(saNamespace, saName)
ret := *restclient.AnonymousClientConfig(config)
restclient.AddUserAgent(&ret, username)
return ret
}

View File

@ -26,7 +26,7 @@ import (
"time"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -40,6 +40,7 @@ import (
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
clientretry "k8s.io/client-go/util/retry"
@ -159,11 +160,11 @@ type ControllerExpectations struct {
// GetExpectations returns the ControlleeExpectations of the given controller.
func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) {
if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
exp, exists, err := r.GetByKey(controllerKey)
if err == nil && exists {
return exp.(*ControlleeExpectations), true, nil
} else {
return nil, false, err
}
return nil, false, err
}
// DeleteExpectations deletes the expectations of the given controller from the TTLStore.
@ -575,18 +576,19 @@ func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodT
if labels.Set(pod.Labels).AsSelectorPreValidated().Empty() {
return fmt.Errorf("unable to create pods, no labels")
}
if newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(pod); err != nil {
newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(pod)
if err != nil {
r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err)
return err
} else {
accessor, err := meta.Accessor(object)
if err != nil {
klog.Errorf("parentObject does not have ObjectMeta, %v", err)
return nil
}
klog.V(4).Infof("Controller %v created pod %v", accessor.GetName(), newPod.Name)
r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name)
}
accessor, err := meta.Accessor(object)
if err != nil {
klog.Errorf("parentObject does not have ObjectMeta, %v", err)
return nil
}
klog.V(4).Infof("Controller %v created pod %v", accessor.GetName(), newPod.Name)
r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name)
return nil
}
@ -1096,3 +1098,29 @@ func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, la
return nil
})
}
func getOrCreateServiceAccount(coreClient v1core.CoreV1Interface, namespace, name string) (*v1.ServiceAccount, error) {
sa, err := coreClient.ServiceAccounts(namespace).Get(name, metav1.GetOptions{})
if err == nil {
return sa, nil
}
if !apierrors.IsNotFound(err) {
return nil, err
}
// Create the namespace if we can't verify it exists.
// Tolerate errors, since we don't know whether this component has namespace creation permissions.
if _, err := coreClient.Namespaces().Get(namespace, metav1.GetOptions{}); apierrors.IsNotFound(err) {
if _, err = coreClient.Namespaces().Create(&v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}); err != nil && !apierrors.IsAlreadyExists(err) {
klog.Warningf("create non-exist namespace %s failed:%v", namespace, err)
}
}
// Create the service account
sa, err = coreClient.ServiceAccounts(namespace).Create(&v1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}})
if apierrors.IsAlreadyExists(err) {
// If we're racing to init and someone else already created it, re-fetch
return coreClient.ServiceAccounts(namespace).Get(name, metav1.GetOptions{})
}
return sa, err
}

View File

@ -227,7 +227,7 @@ func Revision(obj runtime.Object) (int64, error) {
// SetNewReplicaSetAnnotations sets new replica set's annotations appropriately by updating its revision and
// copying required deployment annotations to it; it returns true if replica set's annotation is changed.
func SetNewReplicaSetAnnotations(deployment *apps.Deployment, newRS *apps.ReplicaSet, newRevision string, exists bool) bool {
func SetNewReplicaSetAnnotations(deployment *apps.Deployment, newRS *apps.ReplicaSet, newRevision string, exists bool, revHistoryLimitInChars int) bool {
// First, copy deployment's annotations (except for apply and revision annotations)
annotationChanged := copyDeploymentAnnotationsToReplicaSet(deployment, newRS)
// Then, update replica set's revision annotation
@ -261,14 +261,25 @@ func SetNewReplicaSetAnnotations(deployment *apps.Deployment, newRS *apps.Replic
// If a revision annotation already existed and this replica set was updated with a new revision
// then that means we are rolling back to this replica set. We need to preserve the old revisions
// for historical information.
if ok && annotationChanged {
if ok && oldRevisionInt < newRevisionInt {
revisionHistoryAnnotation := newRS.Annotations[RevisionHistoryAnnotation]
oldRevisions := strings.Split(revisionHistoryAnnotation, ",")
if len(oldRevisions[0]) == 0 {
newRS.Annotations[RevisionHistoryAnnotation] = oldRevision
} else {
oldRevisions = append(oldRevisions, oldRevision)
newRS.Annotations[RevisionHistoryAnnotation] = strings.Join(oldRevisions, ",")
totalLen := len(revisionHistoryAnnotation) + len(oldRevision) + 1
// index for the starting position in oldRevisions
start := 0
for totalLen > revHistoryLimitInChars && start < len(oldRevisions) {
totalLen = totalLen - len(oldRevisions[start]) - 1
start++
}
if totalLen <= revHistoryLimitInChars {
oldRevisions = append(oldRevisions[start:], oldRevision)
newRS.Annotations[RevisionHistoryAnnotation] = strings.Join(oldRevisions, ",")
} else {
klog.Warningf("Not appending revision due to length limit of %v reached", revHistoryLimitInChars)
}
}
}
// If the new replica set is about to be created, we need to add replica annotations to it.

View File

@ -0,0 +1,56 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
import (
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/dynamic/dynamicinformer"
"k8s.io/client-go/informers"
)
// InformerFactory creates informers for each group version resource.
type InformerFactory interface {
ForResource(resource schema.GroupVersionResource) (informers.GenericInformer, error)
Start(stopCh <-chan struct{})
}
type informerFactory struct {
typedInformerFactory informers.SharedInformerFactory
dynamicInformerFactory dynamicinformer.DynamicSharedInformerFactory
}
func (i *informerFactory) ForResource(resource schema.GroupVersionResource) (informers.GenericInformer, error) {
informer, err := i.typedInformerFactory.ForResource(resource)
if err != nil {
return i.dynamicInformerFactory.ForResource(resource), nil
}
return informer, nil
}
func (i *informerFactory) Start(stopCh <-chan struct{}) {
i.typedInformerFactory.Start(stopCh)
i.dynamicInformerFactory.Start(stopCh)
}
// NewInformerFactory creates a new InformerFactory which works with both typed
// resources and dynamic resources
func NewInformerFactory(typedInformerFactory informers.SharedInformerFactory, dynamicInformerFactory dynamicinformer.DynamicSharedInformerFactory) InformerFactory {
return &informerFactory{
typedInformerFactory: typedInformerFactory,
dynamicInformerFactory: dynamicInformerFactory,
}
}

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package job contains logic for watching and synchronizing jobs.
package job // import "k8s.io/kubernetes/pkg/controller/job"

View File

@ -1,871 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package job
import (
"fmt"
"math"
"reflect"
"sort"
"sync"
"time"
batch "k8s.io/api/batch/v1"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
batchinformers "k8s.io/client-go/informers/batch/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
batchv1listers "k8s.io/client-go/listers/batch/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/util/metrics"
"k8s.io/utils/integer"
"k8s.io/klog"
)
const statusUpdateRetries = 3
// controllerKind contains the schema.GroupVersionKind for this controller type.
var controllerKind = batch.SchemeGroupVersion.WithKind("Job")
var (
// DefaultJobBackOff is the max backoff period, exported for the e2e test
DefaultJobBackOff = 10 * time.Second
// MaxJobBackOff is the max backoff period, exported for the e2e test
MaxJobBackOff = 360 * time.Second
)
type JobController struct {
kubeClient clientset.Interface
podControl controller.PodControlInterface
// To allow injection of updateJobStatus for testing.
updateHandler func(job *batch.Job) error
syncHandler func(jobKey string) (bool, error)
// podStoreSynced returns true if the pod store has been synced at least once.
// Added as a member to the struct to allow injection for testing.
podStoreSynced cache.InformerSynced
// jobStoreSynced returns true if the job store has been synced at least once.
// Added as a member to the struct to allow injection for testing.
jobStoreSynced cache.InformerSynced
// A TTLCache of pod creates/deletes each rc expects to see
expectations controller.ControllerExpectationsInterface
// A store of jobs
jobLister batchv1listers.JobLister
// A store of pods, populated by the podController
podStore corelisters.PodLister
// Jobs that need to be updated
queue workqueue.RateLimitingInterface
recorder record.EventRecorder
}
func NewJobController(podInformer coreinformers.PodInformer, jobInformer batchinformers.JobInformer, kubeClient clientset.Interface) *JobController {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(klog.Infof)
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
metrics.RegisterMetricAndTrackRateLimiterUsage("job_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter())
}
jm := &JobController{
kubeClient: kubeClient,
podControl: controller.RealPodControl{
KubeClient: kubeClient,
Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
},
expectations: controller.NewControllerExpectations(),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job"),
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
}
jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
jm.enqueueController(obj, true)
},
UpdateFunc: jm.updateJob,
DeleteFunc: func(obj interface{}) {
jm.enqueueController(obj, true)
},
})
jm.jobLister = jobInformer.Lister()
jm.jobStoreSynced = jobInformer.Informer().HasSynced
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: jm.addPod,
UpdateFunc: jm.updatePod,
DeleteFunc: jm.deletePod,
})
jm.podStore = podInformer.Lister()
jm.podStoreSynced = podInformer.Informer().HasSynced
jm.updateHandler = jm.updateJobStatus
jm.syncHandler = jm.syncJob
return jm
}
// Run the main goroutine responsible for watching and syncing jobs.
func (jm *JobController) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer jm.queue.ShutDown()
klog.Infof("Starting job controller")
defer klog.Infof("Shutting down job controller")
if !controller.WaitForCacheSync("job", stopCh, jm.podStoreSynced, jm.jobStoreSynced) {
return
}
for i := 0; i < workers; i++ {
go wait.Until(jm.worker, time.Second, stopCh)
}
<-stopCh
}
// getPodJobs returns a list of Jobs that potentially match a Pod.
func (jm *JobController) getPodJobs(pod *v1.Pod) []*batch.Job {
jobs, err := jm.jobLister.GetPodJobs(pod)
if err != nil {
return nil
}
if len(jobs) > 1 {
// ControllerRef will ensure we don't do anything crazy, but more than one
// item in this list nevertheless constitutes user error.
utilruntime.HandleError(fmt.Errorf("user error! more than one job is selecting pods with labels: %+v", pod.Labels))
}
ret := make([]*batch.Job, 0, len(jobs))
for i := range jobs {
ret = append(ret, &jobs[i])
}
return ret
}
// resolveControllerRef returns the controller referenced by a ControllerRef,
// or nil if the ControllerRef could not be resolved to a matching controller
// of the correct Kind.
func (jm *JobController) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *batch.Job {
// We can't look up by UID, so look up by Name and then verify UID.
// Don't even try to look up by Name if it's the wrong Kind.
if controllerRef.Kind != controllerKind.Kind {
return nil
}
job, err := jm.jobLister.Jobs(namespace).Get(controllerRef.Name)
if err != nil {
return nil
}
if job.UID != controllerRef.UID {
// The controller we found with this Name is not the same one that the
// ControllerRef points to.
return nil
}
return job
}
// When a pod is created, enqueue the controller that manages it and update it's expectations.
func (jm *JobController) addPod(obj interface{}) {
pod := obj.(*v1.Pod)
if pod.DeletionTimestamp != nil {
// on a restart of the controller controller, it's possible a new pod shows up in a state that
// is already pending deletion. Prevent the pod from being a creation observation.
jm.deletePod(pod)
return
}
// If it has a ControllerRef, that's all that matters.
if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil {
job := jm.resolveControllerRef(pod.Namespace, controllerRef)
if job == nil {
return
}
jobKey, err := controller.KeyFunc(job)
if err != nil {
return
}
jm.expectations.CreationObserved(jobKey)
jm.enqueueController(job, true)
return
}
// Otherwise, it's an orphan. Get a list of all matching controllers and sync
// them to see if anyone wants to adopt it.
// DO NOT observe creation because no controller should be waiting for an
// orphan.
for _, job := range jm.getPodJobs(pod) {
jm.enqueueController(job, true)
}
}
// When a pod is updated, figure out what job/s manage it and wake them up.
// If the labels of the pod have changed we need to awaken both the old
// and new job. old and cur must be *v1.Pod types.
func (jm *JobController) updatePod(old, cur interface{}) {
curPod := cur.(*v1.Pod)
oldPod := old.(*v1.Pod)
if curPod.ResourceVersion == oldPod.ResourceVersion {
// Periodic resync will send update events for all known pods.
// Two different versions of the same pod will always have different RVs.
return
}
if curPod.DeletionTimestamp != nil {
// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
// for modification of the deletion timestamp and expect an job to create more pods asap, not wait
// until the kubelet actually deletes the pod.
jm.deletePod(curPod)
return
}
// the only time we want the backoff to kick-in, is when the pod failed
immediate := curPod.Status.Phase != v1.PodFailed
curControllerRef := metav1.GetControllerOf(curPod)
oldControllerRef := metav1.GetControllerOf(oldPod)
controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
if controllerRefChanged && oldControllerRef != nil {
// The ControllerRef was changed. Sync the old controller, if any.
if job := jm.resolveControllerRef(oldPod.Namespace, oldControllerRef); job != nil {
jm.enqueueController(job, immediate)
}
}
// If it has a ControllerRef, that's all that matters.
if curControllerRef != nil {
job := jm.resolveControllerRef(curPod.Namespace, curControllerRef)
if job == nil {
return
}
jm.enqueueController(job, immediate)
return
}
// Otherwise, it's an orphan. If anything changed, sync matching controllers
// to see if anyone wants to adopt it now.
labelChanged := !reflect.DeepEqual(curPod.Labels, oldPod.Labels)
if labelChanged || controllerRefChanged {
for _, job := range jm.getPodJobs(curPod) {
jm.enqueueController(job, immediate)
}
}
}
// When a pod is deleted, enqueue the job that manages the pod and update its expectations.
// obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item.
func (jm *JobController) deletePod(obj interface{}) {
pod, ok := obj.(*v1.Pod)
// When a delete is dropped, the relist will notice a pod in the store not
// in the list, leading to the insertion of a tombstone object which contains
// the deleted key/value. Note that this value might be stale. If the pod
// changed labels the new job will not be woken up till the periodic resync.
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj))
return
}
pod, ok = tombstone.Obj.(*v1.Pod)
if !ok {
utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a pod %+v", obj))
return
}
}
controllerRef := metav1.GetControllerOf(pod)
if controllerRef == nil {
// No controller should care about orphans being deleted.
return
}
job := jm.resolveControllerRef(pod.Namespace, controllerRef)
if job == nil {
return
}
jobKey, err := controller.KeyFunc(job)
if err != nil {
return
}
jm.expectations.DeletionObserved(jobKey)
jm.enqueueController(job, true)
}
func (jm *JobController) updateJob(old, cur interface{}) {
oldJob := old.(*batch.Job)
curJob := cur.(*batch.Job)
// never return error
key, err := controller.KeyFunc(curJob)
if err != nil {
return
}
jm.enqueueController(curJob, true)
// check if need to add a new rsync for ActiveDeadlineSeconds
if curJob.Status.StartTime != nil {
curADS := curJob.Spec.ActiveDeadlineSeconds
if curADS == nil {
return
}
oldADS := oldJob.Spec.ActiveDeadlineSeconds
if oldADS == nil || *oldADS != *curADS {
now := metav1.Now()
start := curJob.Status.StartTime.Time
passed := now.Time.Sub(start)
total := time.Duration(*curADS) * time.Second
// AddAfter will handle total < passed
jm.queue.AddAfter(key, total-passed)
klog.V(4).Infof("job ActiveDeadlineSeconds updated, will rsync after %d seconds", total-passed)
}
}
}
// obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item,
// immediate tells the controller to update the status right away, and should
// happen ONLY when there was a successful pod run.
func (jm *JobController) enqueueController(obj interface{}, immediate bool) {
key, err := controller.KeyFunc(obj)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
return
}
backoff := time.Duration(0)
if !immediate {
backoff = getBackoff(jm.queue, key)
}
// TODO: Handle overlapping controllers better. Either disallow them at admission time or
// deterministically avoid syncing controllers that fight over pods. Currently, we only
// ensure that the same controller is synced for a given pod. When we periodically relist
// all controllers there will still be some replica instability. One way to handle this is
// by querying the store for all controllers that this rc overlaps, as well as all
// controllers that overlap this rc, and sorting them.
jm.queue.AddAfter(key, backoff)
}
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (jm *JobController) worker() {
for jm.processNextWorkItem() {
}
}
func (jm *JobController) processNextWorkItem() bool {
key, quit := jm.queue.Get()
if quit {
return false
}
defer jm.queue.Done(key)
forget, err := jm.syncHandler(key.(string))
if err == nil {
if forget {
jm.queue.Forget(key)
}
return true
}
utilruntime.HandleError(fmt.Errorf("Error syncing job: %v", err))
jm.queue.AddRateLimited(key)
return true
}
// getPodsForJob returns the set of pods that this Job should manage.
// It also reconciles ControllerRef by adopting/orphaning.
// Note that the returned Pods are pointers into the cache.
func (jm *JobController) getPodsForJob(j *batch.Job) ([]*v1.Pod, error) {
selector, err := metav1.LabelSelectorAsSelector(j.Spec.Selector)
if err != nil {
return nil, fmt.Errorf("couldn't convert Job selector: %v", err)
}
// List all pods to include those that don't match the selector anymore
// but have a ControllerRef pointing to this controller.
pods, err := jm.podStore.Pods(j.Namespace).List(labels.Everything())
if err != nil {
return nil, err
}
// If any adoptions are attempted, we should first recheck for deletion
// with an uncached quorum read sometime after listing Pods (see #42639).
canAdoptFunc := controller.RecheckDeletionTimestamp(func() (metav1.Object, error) {
fresh, err := jm.kubeClient.BatchV1().Jobs(j.Namespace).Get(j.Name, metav1.GetOptions{})
if err != nil {
return nil, err
}
if fresh.UID != j.UID {
return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", j.Namespace, j.Name, fresh.UID, j.UID)
}
return fresh, nil
})
cm := controller.NewPodControllerRefManager(jm.podControl, j, selector, controllerKind, canAdoptFunc)
return cm.ClaimPods(pods)
}
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (jm *JobController) syncJob(key string) (bool, error) {
startTime := time.Now()
defer func() {
klog.V(4).Infof("Finished syncing job %q (%v)", key, time.Since(startTime))
}()
ns, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
return false, err
}
if len(ns) == 0 || len(name) == 0 {
return false, fmt.Errorf("invalid job key %q: either namespace or name is missing", key)
}
sharedJob, err := jm.jobLister.Jobs(ns).Get(name)
if err != nil {
if errors.IsNotFound(err) {
klog.V(4).Infof("Job has been deleted: %v", key)
jm.expectations.DeleteExpectations(key)
return true, nil
}
return false, err
}
job := *sharedJob
// if job was finished previously, we don't want to redo the termination
if IsJobFinished(&job) {
return true, nil
}
// retrieve the previous number of retry
previousRetry := jm.queue.NumRequeues(key)
// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
// and update the expectations after we've retrieved active pods from the store. If a new pod enters
// the store after we've checked the expectation, the job sync is just deferred till the next relist.
jobNeedsSync := jm.expectations.SatisfiedExpectations(key)
pods, err := jm.getPodsForJob(&job)
if err != nil {
return false, err
}
activePods := controller.FilterActivePods(pods)
active := int32(len(activePods))
succeeded, failed := getStatus(pods)
conditions := len(job.Status.Conditions)
// job first start
if job.Status.StartTime == nil {
now := metav1.Now()
job.Status.StartTime = &now
// enqueue a sync to check if job past ActiveDeadlineSeconds
if job.Spec.ActiveDeadlineSeconds != nil {
klog.V(4).Infof("Job %s have ActiveDeadlineSeconds will sync after %d seconds",
key, *job.Spec.ActiveDeadlineSeconds)
jm.queue.AddAfter(key, time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second)
}
}
var manageJobErr error
jobFailed := false
var failureReason string
var failureMessage string
jobHaveNewFailure := failed > job.Status.Failed
// new failures happen when status does not reflect the failures and active
// is different than parallelism, otherwise the previous controller loop
// failed updating status so even if we pick up failure it is not a new one
exceedsBackoffLimit := jobHaveNewFailure && (active != *job.Spec.Parallelism) &&
(int32(previousRetry)+1 > *job.Spec.BackoffLimit)
if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
// check if the number of pod restart exceeds backoff (for restart OnFailure only)
// OR if the number of failed jobs increased since the last syncJob
jobFailed = true
failureReason = "BackoffLimitExceeded"
failureMessage = "Job has reached the specified backoff limit"
} else if pastActiveDeadline(&job) {
jobFailed = true
failureReason = "DeadlineExceeded"
failureMessage = "Job was active longer than specified deadline"
}
if jobFailed {
errCh := make(chan error, active)
jm.deleteJobPods(&job, activePods, errCh)
select {
case manageJobErr = <-errCh:
if manageJobErr != nil {
break
}
default:
}
// update status values accordingly
failed += active
active = 0
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, failureReason, failureMessage))
jm.recorder.Event(&job, v1.EventTypeWarning, failureReason, failureMessage)
} else {
if jobNeedsSync && job.DeletionTimestamp == nil {
active, manageJobErr = jm.manageJob(activePods, succeeded, &job)
}
completions := succeeded
complete := false
if job.Spec.Completions == nil {
// This type of job is complete when any pod exits with success.
// Each pod is capable of
// determining whether or not the entire Job is done. Subsequent pods are
// not expected to fail, but if they do, the failure is ignored. Once any
// pod succeeds, the controller waits for remaining pods to finish, and
// then the job is complete.
if succeeded > 0 && active == 0 {
complete = true
}
} else {
// Job specifies a number of completions. This type of job signals
// success by having that number of successes. Since we do not
// start more pods than there are remaining completions, there should
// not be any remaining active pods once this count is reached.
if completions >= *job.Spec.Completions {
complete = true
if active > 0 {
jm.recorder.Event(&job, v1.EventTypeWarning, "TooManyActivePods", "Too many active pods running after completion count reached")
}
if completions > *job.Spec.Completions {
jm.recorder.Event(&job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
}
}
}
if complete {
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", ""))
now := metav1.Now()
job.Status.CompletionTime = &now
}
}
forget := false
// Check if the number of jobs succeeded increased since the last check. If yes "forget" should be true
// This logic is linked to the issue: https://github.com/kubernetes/kubernetes/issues/56853 that aims to
// improve the Job backoff policy when parallelism > 1 and few Jobs failed but others succeed.
// In this case, we should clear the backoff delay.
if job.Status.Succeeded < succeeded {
forget = true
}
// no need to update the job if the status hasn't changed since last time
if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions {
job.Status.Active = active
job.Status.Succeeded = succeeded
job.Status.Failed = failed
if err := jm.updateHandler(&job); err != nil {
return forget, err
}
if jobHaveNewFailure && !IsJobFinished(&job) {
// returning an error will re-enqueue Job after the backoff period
return forget, fmt.Errorf("failed pod(s) detected for job key %q", key)
}
forget = true
}
return forget, manageJobErr
}
func (jm *JobController) deleteJobPods(job *batch.Job, pods []*v1.Pod, errCh chan<- error) {
// TODO: below code should be replaced with pod termination resulting in
// pod failures, rather than killing pods. Unfortunately none such solution
// exists ATM. There's an open discussion in the topic in
// https://github.com/kubernetes/kubernetes/issues/14602 which might give
// some sort of solution to above problem.
// kill remaining active pods
wait := sync.WaitGroup{}
nbPods := len(pods)
wait.Add(nbPods)
for i := int32(0); i < int32(nbPods); i++ {
go func(ix int32) {
defer wait.Done()
if err := jm.podControl.DeletePod(job.Namespace, pods[ix].Name, job); err != nil {
defer utilruntime.HandleError(err)
klog.V(2).Infof("Failed to delete %v, job %q/%q deadline exceeded", pods[ix].Name, job.Namespace, job.Name)
errCh <- err
}
}(i)
}
wait.Wait()
}
// pastBackoffLimitOnFailure checks if container restartCounts sum exceeds BackoffLimit
// this method applies only to pods with restartPolicy == OnFailure
func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool {
if job.Spec.Template.Spec.RestartPolicy != v1.RestartPolicyOnFailure {
return false
}
result := int32(0)
for i := range pods {
po := pods[i]
if po.Status.Phase != v1.PodRunning {
continue
}
for j := range po.Status.InitContainerStatuses {
stat := po.Status.InitContainerStatuses[j]
result += stat.RestartCount
}
for j := range po.Status.ContainerStatuses {
stat := po.Status.ContainerStatuses[j]
result += stat.RestartCount
}
}
if *job.Spec.BackoffLimit == 0 {
return result > 0
}
return result >= *job.Spec.BackoffLimit
}
// pastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if it is exceeded.
func pastActiveDeadline(job *batch.Job) bool {
if job.Spec.ActiveDeadlineSeconds == nil || job.Status.StartTime == nil {
return false
}
now := metav1.Now()
start := job.Status.StartTime.Time
duration := now.Time.Sub(start)
allowedDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds) * time.Second
return duration >= allowedDuration
}
func newCondition(conditionType batch.JobConditionType, reason, message string) batch.JobCondition {
return batch.JobCondition{
Type: conditionType,
Status: v1.ConditionTrue,
LastProbeTime: metav1.Now(),
LastTransitionTime: metav1.Now(),
Reason: reason,
Message: message,
}
}
// getStatus returns no of succeeded and failed pods running a job
func getStatus(pods []*v1.Pod) (succeeded, failed int32) {
succeeded = int32(filterPods(pods, v1.PodSucceeded))
failed = int32(filterPods(pods, v1.PodFailed))
return
}
// manageJob is the core method responsible for managing the number of running
// pods according to what is specified in the job.Spec.
// Does NOT modify <activePods>.
func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *batch.Job) (int32, error) {
var activeLock sync.Mutex
active := int32(len(activePods))
parallelism := *job.Spec.Parallelism
jobKey, err := controller.KeyFunc(job)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Couldn't get key for job %#v: %v", job, err))
return 0, nil
}
var errCh chan error
if active > parallelism {
diff := active - parallelism
errCh = make(chan error, diff)
jm.expectations.ExpectDeletions(jobKey, int(diff))
klog.V(4).Infof("Too many pods running job %q, need %d, deleting %d", jobKey, parallelism, diff)
// Sort the pods in the order such that not-ready < ready, unscheduled
// < scheduled, and pending < running. This ensures that we delete pods
// in the earlier stages whenever possible.
sort.Sort(controller.ActivePods(activePods))
active -= diff
wait := sync.WaitGroup{}
wait.Add(int(diff))
for i := int32(0); i < diff; i++ {
go func(ix int32) {
defer wait.Done()
if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, job); err != nil {
defer utilruntime.HandleError(err)
// Decrement the expected number of deletes because the informer won't observe this deletion
klog.V(2).Infof("Failed to delete %v, decrementing expectations for job %q/%q", activePods[ix].Name, job.Namespace, job.Name)
jm.expectations.DeletionObserved(jobKey)
activeLock.Lock()
active++
activeLock.Unlock()
errCh <- err
}
}(i)
}
wait.Wait()
} else if active < parallelism {
wantActive := int32(0)
if job.Spec.Completions == nil {
// Job does not specify a number of completions. Therefore, number active
// should be equal to parallelism, unless the job has seen at least
// once success, in which leave whatever is running, running.
if succeeded > 0 {
wantActive = active
} else {
wantActive = parallelism
}
} else {
// Job specifies a specific number of completions. Therefore, number
// active should not ever exceed number of remaining completions.
wantActive = *job.Spec.Completions - succeeded
if wantActive > parallelism {
wantActive = parallelism
}
}
diff := wantActive - active
if diff < 0 {
utilruntime.HandleError(fmt.Errorf("More active than wanted: job %q, want %d, have %d", jobKey, wantActive, active))
diff = 0
}
jm.expectations.ExpectCreations(jobKey, int(diff))
errCh = make(chan error, diff)
klog.V(4).Infof("Too few pods running job %q, need %d, creating %d", jobKey, wantActive, diff)
active += diff
wait := sync.WaitGroup{}
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
// and double with each successful iteration in a kind of "slow start".
// This handles attempts to start large numbers of pods that would
// likely all fail with the same error. For example a project with a
// low quota that attempts to create a large number of pods will be
// prevented from spamming the API service with the pod create requests
// after one of its pods fails. Conveniently, this also prevents the
// event spam that those failures would generate.
for batchSize := int32(integer.IntMin(int(diff), controller.SlowStartInitialBatchSize)); diff > 0; batchSize = integer.Int32Min(2*batchSize, diff) {
errorCount := len(errCh)
wait.Add(int(batchSize))
for i := int32(0); i < batchSize; i++ {
go func() {
defer wait.Done()
err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind))
if err != nil && errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return
}
if err != nil {
defer utilruntime.HandleError(err)
// Decrement the expected number of creates because the informer won't observe this pod
klog.V(2).Infof("Failed creation, decrementing expectations for job %q/%q", job.Namespace, job.Name)
jm.expectations.CreationObserved(jobKey)
activeLock.Lock()
active--
activeLock.Unlock()
errCh <- err
}
}()
}
wait.Wait()
// any skipped pods that we never attempted to start shouldn't be expected.
skippedPods := diff - batchSize
if errorCount < len(errCh) && skippedPods > 0 {
klog.V(2).Infof("Slow-start failure. Skipping creation of %d pods, decrementing expectations for job %q/%q", skippedPods, job.Namespace, job.Name)
active -= skippedPods
for i := int32(0); i < skippedPods; i++ {
// Decrement the expected number of creates because the informer won't observe this pod
jm.expectations.CreationObserved(jobKey)
}
// The skipped pods will be retried later. The next controller resync will
// retry the slow start process.
break
}
diff -= batchSize
}
}
select {
case err := <-errCh:
// all errors have been reported before, we only need to inform the controller that there was an error and it should re-try this job once more next time.
if err != nil {
return active, err
}
default:
}
return active, nil
}
func (jm *JobController) updateJobStatus(job *batch.Job) error {
jobClient := jm.kubeClient.BatchV1().Jobs(job.Namespace)
var err error
for i := 0; i <= statusUpdateRetries; i = i + 1 {
var newJob *batch.Job
newJob, err = jobClient.Get(job.Name, metav1.GetOptions{})
if err != nil {
break
}
newJob.Status = job.Status
if _, err = jobClient.UpdateStatus(newJob); err == nil {
break
}
}
return err
}
func getBackoff(queue workqueue.RateLimitingInterface, key interface{}) time.Duration {
exp := queue.NumRequeues(key)
if exp <= 0 {
return time.Duration(0)
}
// The backoff is capped such that 'calculated' value never overflows.
backoff := float64(DefaultJobBackOff.Nanoseconds()) * math.Pow(2, float64(exp-1))
if backoff > math.MaxInt64 {
return MaxJobBackOff
}
calculated := time.Duration(backoff)
if calculated > MaxJobBackOff {
return MaxJobBackOff
}
return calculated
}
// filterPods returns pods based on their phase.
func filterPods(pods []*v1.Pod, phase v1.PodPhase) int {
result := 0
for i := range pods {
if phase == pods[i].Status.Phase {
result++
}
}
return result
}

View File

@ -1,31 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package job
import (
batch "k8s.io/api/batch/v1"
"k8s.io/api/core/v1"
)
func IsJobFinished(j *batch.Job) bool {
for _, c := range j.Status.Conditions {
if (c.Type == batch.JobComplete || c.Type == batch.JobFailed) && c.Status == v1.ConditionTrue {
return true
}
}
return false
}

View File

@ -23,8 +23,6 @@ package nodelifecycle
import (
"fmt"
"hash/fnv"
"io"
"sync"
"time"
@ -625,8 +623,10 @@ func (nc *Controller) doEvictionPass() {
}
// monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown". It also evicts all pods if node
// is not ready or not reachable for a long period of time.
// if not, post "NodeReady==ConditionUnknown".
// For nodes who are not ready or not reachable for a long period of time.
// This function will taint them if TaintBasedEvictions feature was enabled.
// Otherwise, it would evict it directly.
func (nc *Controller) monitorNodeHealth() error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
@ -862,7 +862,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
transitionTime = savedNodeHealth.readyTransitionTimestamp
}
if klog.V(5) {
klog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeHealth.status, node.Status)
klog.Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeHealth.status, node.Status)
} else {
klog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
}
@ -1276,9 +1276,3 @@ func (nc *Controller) reconcileNodeLabels(nodeName string) error {
}
return nil
}
func hash(val string, max int) int {
hasher := fnv.New32a()
io.WriteString(hasher, val)
return int(hasher.Sum32()) % max
}

View File

@ -0,0 +1,59 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"encoding/json"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
)
// patch patches service's Status or ObjectMeta given the origin and
// updated ones. Change to spec will be ignored.
func patch(c v1core.CoreV1Interface, oldSvc *v1.Service, newSvc *v1.Service) (*v1.Service, error) {
// Reset spec to make sure only patch for Status or ObjectMeta.
newSvc.Spec = oldSvc.Spec
patchBytes, err := getPatchBytes(oldSvc, newSvc)
if err != nil {
return nil, err
}
return c.Services(oldSvc.Namespace).Patch(oldSvc.Name, types.StrategicMergePatchType, patchBytes, "status")
}
func getPatchBytes(oldSvc *v1.Service, newSvc *v1.Service) ([]byte, error) {
oldData, err := json.Marshal(oldSvc)
if err != nil {
return nil, fmt.Errorf("failed to Marshal oldData for svc %s/%s: %v", oldSvc.Namespace, oldSvc.Name, err)
}
newData, err := json.Marshal(newSvc)
if err != nil {
return nil, fmt.Errorf("failed to Marshal newData for svc %s/%s: %v", newSvc.Namespace, newSvc.Name, err)
}
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Service{})
if err != nil {
return nil, fmt.Errorf("failed to CreateTwoWayMergePatch for svc %s/%s: %v", oldSvc.Namespace, oldSvc.Name, err)
}
return patchBytes, nil
}

View File

@ -24,7 +24,7 @@ import (
"reflect"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
@ -39,11 +39,13 @@ import (
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
cloudprovider "k8s.io/cloud-provider"
servicehelper "k8s.io/cloud-provider/service/helpers"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/controller"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/util/metrics"
"k8s.io/kubernetes/pkg/util/slice"
)
const (
@ -135,15 +137,28 @@ func New(
serviceInformer.Informer().AddEventHandlerWithResyncPeriod(
cache.ResourceEventHandlerFuncs{
AddFunc: s.enqueueService,
UpdateFunc: func(old, cur interface{}) {
oldSvc, ok1 := old.(*v1.Service)
curSvc, ok2 := cur.(*v1.Service)
if ok1 && ok2 && s.needsUpdate(oldSvc, curSvc) {
AddFunc: func(cur interface{}) {
svc, ok := cur.(*v1.Service)
if ok && (wantsLoadBalancer(svc) || needsCleanup(svc)) {
s.enqueueService(cur)
}
},
DeleteFunc: s.enqueueService,
UpdateFunc: func(old, cur interface{}) {
oldSvc, ok1 := old.(*v1.Service)
curSvc, ok2 := cur.(*v1.Service)
if ok1 && ok2 && (s.needsUpdate(oldSvc, curSvc) || needsCleanup(curSvc)) {
s.enqueueService(cur)
}
},
DeleteFunc: func(old interface{}) {
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.ServiceLoadBalancerFinalizer) {
// No need to handle deletion event if finalizer feature gate is
// enabled. Because the deletion would be handled by the update
// path when the deletion timestamp is added.
return
}
s.enqueueService(old)
},
},
serviceSyncPeriod,
)
@ -160,7 +175,7 @@ func New(
func (s *ServiceController) enqueueService(obj interface{}) {
key, err := controller.KeyFunc(obj)
if err != nil {
klog.Errorf("Couldn't get key for object %#v: %v", obj, err)
runtime.HandleError(fmt.Errorf("couldn't get key for object %#v: %v", obj, err))
return
}
s.queue.Add(key)
@ -235,129 +250,112 @@ func (s *ServiceController) init() error {
return nil
}
// processServiceUpdate operates loadbalancers for the incoming service accordingly.
// processServiceCreateOrUpdate operates loadbalancers for the incoming service accordingly.
// Returns an error if processing the service update failed.
func (s *ServiceController) processServiceUpdate(cachedService *cachedService, service *v1.Service, key string) error {
if cachedService.state != nil {
if cachedService.state.UID != service.UID {
err := s.processLoadBalancerDelete(cachedService, key)
if err != nil {
return err
}
func (s *ServiceController) processServiceCreateOrUpdate(service *v1.Service, key string) error {
// TODO(@MrHohn): Remove the cache once we get rid of the non-finalizer deletion
// path. Ref https://github.com/kubernetes/enhancements/issues/980.
cachedService := s.cache.getOrCreate(key)
if cachedService.state != nil && cachedService.state.UID != service.UID {
// This happens only when a service is deleted and re-created
// in a short period, which is only possible when it doesn't
// contain finalizer.
if err := s.processLoadBalancerDelete(cachedService.state, key); err != nil {
return err
}
}
// cache the service, we need the info for service deletion
// Always cache the service, we need the info for service deletion in case
// when load balancer cleanup is not handled via finalizer.
cachedService.state = service
err := s.createLoadBalancerIfNeeded(key, service)
op, err := s.syncLoadBalancerIfNeeded(service, key)
if err != nil {
eventType := "CreatingLoadBalancerFailed"
message := "Error creating load balancer (will retry): "
if !wantsLoadBalancer(service) {
eventType = "CleanupLoadBalancerFailed"
message = "Error cleaning up load balancer (will retry): "
}
message += err.Error()
s.eventRecorder.Event(service, v1.EventTypeWarning, eventType, message)
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "SyncLoadBalancerFailed", "Error syncing load balancer: %v", err)
return err
}
// Always update the cache upon success.
// NOTE: Since we update the cached service if and only if we successfully
// processed it, a cached service being nil implies that it hasn't yet
// been successfully processed.
s.cache.set(key, cachedService)
if op == deleteLoadBalancer {
// Only delete the cache upon successful load balancer deletion.
s.cache.delete(key)
}
return nil
}
// createLoadBalancerIfNeeded ensures that service's status is synced up with loadbalancer
type loadBalancerOperation int
const (
deleteLoadBalancer loadBalancerOperation = iota
ensureLoadBalancer
)
// syncLoadBalancerIfNeeded ensures that service's status is synced up with loadbalancer
// i.e. creates loadbalancer for service if requested and deletes loadbalancer if the service
// doesn't want a loadbalancer no more. Returns whatever error occurred.
func (s *ServiceController) createLoadBalancerIfNeeded(key string, service *v1.Service) error {
func (s *ServiceController) syncLoadBalancerIfNeeded(service *v1.Service, key string) (loadBalancerOperation, error) {
// Note: It is safe to just call EnsureLoadBalancer. But, on some clouds that requires a delete & create,
// which may involve service interruption. Also, we would like user-friendly events.
// Save the state so we can avoid a write if it doesn't change
previousState := v1helper.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer)
var newState *v1.LoadBalancerStatus
previousStatus := v1helper.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer)
var newStatus *v1.LoadBalancerStatus
var op loadBalancerOperation
var err error
if !wantsLoadBalancer(service) {
if !wantsLoadBalancer(service) || needsCleanup(service) {
// Delete the load balancer if service no longer wants one, or if service needs cleanup.
op = deleteLoadBalancer
newStatus = &v1.LoadBalancerStatus{}
_, exists, err := s.balancer.GetLoadBalancer(context.TODO(), s.clusterName, service)
if err != nil {
return fmt.Errorf("error getting LB for service %s: %v", key, err)
return op, fmt.Errorf("failed to check if load balancer exists before cleanup: %v", err)
}
if exists {
klog.Infof("Deleting existing load balancer for service %s that no longer needs a load balancer.", key)
klog.V(2).Infof("Deleting existing load balancer for service %s", key)
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletingLoadBalancer", "Deleting load balancer")
if err := s.balancer.EnsureLoadBalancerDeleted(context.TODO(), s.clusterName, service); err != nil {
return err
return op, fmt.Errorf("failed to delete load balancer: %v", err)
}
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletedLoadBalancer", "Deleted load balancer")
}
newState = &v1.LoadBalancerStatus{}
// Always try to remove finalizer when load balancer is deleted.
// It will be a no-op if finalizer does not exist.
// Note this also clears up finalizer if the cluster is downgraded
// from a version that attaches finalizer to a version that doesn't.
if err := s.removeFinalizer(service); err != nil {
return op, fmt.Errorf("failed to remove load balancer cleanup finalizer: %v", err)
}
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletedLoadBalancer", "Deleted load balancer")
} else {
klog.V(2).Infof("Ensuring LB for service %s", key)
// TODO: We could do a dry-run here if wanted to avoid the spurious cloud-calls & events when we restart
// Create or update the load balancer if service wants one.
op = ensureLoadBalancer
klog.V(2).Infof("Ensuring load balancer for service %s", key)
s.eventRecorder.Event(service, v1.EventTypeNormal, "EnsuringLoadBalancer", "Ensuring load balancer")
newState, err = s.ensureLoadBalancer(service)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.ServiceLoadBalancerFinalizer) {
// Always try to add finalizer prior to load balancer creation.
// It will be a no-op if finalizer already exists.
// Note this also retrospectively puts on finalizer if the cluster
// is upgraded from a version that doesn't attach finalizer to a
// version that does.
if err := s.addFinalizer(service); err != nil {
return op, fmt.Errorf("failed to add load balancer cleanup finalizer: %v", err)
}
}
newStatus, err = s.ensureLoadBalancer(service)
if err != nil {
return fmt.Errorf("failed to ensure load balancer for service %s: %v", key, err)
return op, fmt.Errorf("failed to ensure load balancer: %v", err)
}
s.eventRecorder.Event(service, v1.EventTypeNormal, "EnsuredLoadBalancer", "Ensured load balancer")
}
// Write the state if changed
// TODO: Be careful here ... what if there were other changes to the service?
if !v1helper.LoadBalancerStatusEqual(previousState, newState) {
// Make a copy so we don't mutate the shared informer cache
service = service.DeepCopy()
// Update the status on the copy
service.Status.LoadBalancer = *newState
if err := s.persistUpdate(service); err != nil {
// TODO: This logic needs to be revisited. We might want to retry on all the errors, not just conflicts.
if errors.IsConflict(err) {
return fmt.Errorf("not persisting update to service '%s/%s' that has been changed since we received it: %v", service.Namespace, service.Name, err)
}
runtime.HandleError(fmt.Errorf("failed to persist service %q updated status to apiserver, even after retries. Giving up: %v", key, err))
return nil
if err := s.patchStatus(service, previousStatus, newStatus); err != nil {
// Only retry error that isn't not found:
// - Not found error mostly happens when service disappears right after
// we remove the finalizer.
// - We can't patch status on non-exist service anyway.
if !errors.IsNotFound(err) {
return op, fmt.Errorf("failed to update load balancer status: %v", err)
}
} else {
klog.V(2).Infof("Not persisting unchanged LoadBalancerStatus for service %s to registry.", key)
}
return nil
}
func (s *ServiceController) persistUpdate(service *v1.Service) error {
var err error
for i := 0; i < clientRetryCount; i++ {
_, err = s.kubeClient.CoreV1().Services(service.Namespace).UpdateStatus(service)
if err == nil {
return nil
}
// If the object no longer exists, we don't want to recreate it. Just bail
// out so that we can process the delete, which we should soon be receiving
// if we haven't already.
if errors.IsNotFound(err) {
klog.Infof("Not persisting update to service '%s/%s' that no longer exists: %v",
service.Namespace, service.Name, err)
return nil
}
// TODO: Try to resolve the conflict if the change was unrelated to load
// balancer status. For now, just pass it up the stack.
if errors.IsConflict(err) {
return err
}
klog.Warningf("Failed to persist updated LoadBalancerStatus to service '%s/%s' after creating its load balancer: %v",
service.Namespace, service.Name, err)
time.Sleep(clientRetryInterval)
}
return err
return op, nil
}
func (s *ServiceController) ensureLoadBalancer(service *v1.Service) (*v1.LoadBalancerStatus, error) {
@ -368,7 +366,7 @@ func (s *ServiceController) ensureLoadBalancer(service *v1.Service) (*v1.LoadBal
// If there are no available nodes for LoadBalancer service, make a EventTypeWarning event for it.
if len(nodes) == 0 {
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "UnAvailableLoadBalancer", "There are no available nodes for LoadBalancer service %s/%s", service.Namespace, service.Name)
s.eventRecorder.Event(service, v1.EventTypeWarning, "UnAvailableLoadBalancer", "There are no available nodes for LoadBalancer")
}
// - Only one protocol supported per service
@ -441,6 +439,12 @@ func (s *serviceCache) delete(serviceName string) {
delete(s.serviceMap, serviceName)
}
// needsCleanup checks if load balancer needs to be cleaned up as indicated by finalizer.
func needsCleanup(service *v1.Service) bool {
return service.ObjectMeta.DeletionTimestamp != nil && servicehelper.HasLBFinalizer(service)
}
// needsUpdate checks if load balancer needs to be updated due to change in attributes.
func (s *ServiceController) needsUpdate(oldService *v1.Service, newService *v1.Service) bool {
if !wantsLoadBalancer(oldService) && !wantsLoadBalancer(newService) {
return false
@ -626,7 +630,7 @@ func getNodeConditionPredicate() corelisters.NodeConditionPredicate {
func (s *ServiceController) nodeSyncLoop() {
newHosts, err := s.nodeLister.ListWithPredicate(getNodeConditionPredicate())
if err != nil {
klog.Errorf("Failed to retrieve current set of nodes from node lister: %v", err)
runtime.HandleError(fmt.Errorf("Failed to retrieve current set of nodes from node lister: %v", err))
return
}
if nodeSlicesEqualForLB(newHosts, s.knownHosts) {
@ -636,7 +640,7 @@ func (s *ServiceController) nodeSyncLoop() {
return
}
klog.Infof("Detected change in list of current cluster nodes. New node set: %v",
klog.V(2).Infof("Detected change in list of current cluster nodes. New node set: %v",
nodeNames(newHosts))
// Try updating all services, and save the ones that fail to try again next
@ -644,7 +648,7 @@ func (s *ServiceController) nodeSyncLoop() {
s.servicesToUpdate = s.cache.allServices()
numServices := len(s.servicesToUpdate)
s.servicesToUpdate = s.updateLoadBalancerHosts(s.servicesToUpdate, newHosts)
klog.Infof("Successfully updated %d out of %d load balancers to direct traffic to the updated set of nodes",
klog.V(2).Infof("Successfully updated %d out of %d load balancers to direct traffic to the updated set of nodes",
numServices-len(s.servicesToUpdate), numServices)
s.knownHosts = newHosts
@ -660,7 +664,7 @@ func (s *ServiceController) updateLoadBalancerHosts(services []*v1.Service, host
return
}
if err := s.lockedUpdateLoadBalancerHosts(service, hosts); err != nil {
klog.Errorf("External error while updating load balancer: %v.", err)
runtime.HandleError(fmt.Errorf("failed to update load balancer hosts for service %s/%s: %v", service.Namespace, service.Name, err))
servicesToRetry = append(servicesToRetry, service)
}
}()
@ -680,7 +684,7 @@ func (s *ServiceController) lockedUpdateLoadBalancerHosts(service *v1.Service, h
if err == nil {
// If there are no available nodes for LoadBalancer service, make a EventTypeWarning event for it.
if len(hosts) == 0 {
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "UnAvailableLoadBalancer", "There are no available nodes for LoadBalancer service %s/%s", service.Namespace, service.Name)
s.eventRecorder.Event(service, v1.EventTypeWarning, "UnAvailableLoadBalancer", "There are no available nodes for LoadBalancer")
} else {
s.eventRecorder.Event(service, v1.EventTypeNormal, "UpdatedLoadBalancer", "Updated load balancer with new hosts")
}
@ -689,12 +693,12 @@ func (s *ServiceController) lockedUpdateLoadBalancerHosts(service *v1.Service, h
// It's only an actual error if the load balancer still exists.
if _, exists, err := s.balancer.GetLoadBalancer(context.TODO(), s.clusterName, service); err != nil {
klog.Errorf("External error while checking if load balancer %q exists: name, %v", s.balancer.GetLoadBalancerName(context.TODO(), s.clusterName, service), err)
runtime.HandleError(fmt.Errorf("failed to check if load balancer exists for service %s/%s: %v", service.Namespace, service.Name, err))
} else if !exists {
return nil
}
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "LoadBalancerUpdateFailed", "Error updating load balancer with new hosts %v: %v", nodeNames(hosts), err)
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "UpdateLoadBalancerFailed", "Error updating load balancer with new hosts %v: %v", nodeNames(hosts), err)
return err
}
@ -711,7 +715,6 @@ func loadBalancerIPsAreEqual(oldService, newService *v1.Service) bool {
// invoked concurrently with the same key.
func (s *ServiceController) syncService(key string) error {
startTime := time.Now()
var cachedService *cachedService
defer func() {
klog.V(4).Infof("Finished syncing service %q (%v)", key, time.Since(startTime))
}()
@ -726,44 +729,88 @@ func (s *ServiceController) syncService(key string) error {
switch {
case errors.IsNotFound(err):
// service absence in store means watcher caught the deletion, ensure LB info is cleaned
klog.Infof("Service has been deleted %v. Attempting to cleanup load balancer resources", key)
err = s.processServiceDeletion(key)
case err != nil:
klog.Infof("Unable to retrieve service %v from store: %v", key, err)
runtime.HandleError(fmt.Errorf("Unable to retrieve service %v from store: %v", key, err))
default:
cachedService = s.cache.getOrCreate(key)
err = s.processServiceUpdate(cachedService, service, key)
err = s.processServiceCreateOrUpdate(service, key)
}
return err
}
// Returns an error if processing the service deletion failed, along with a time.Duration
// indicating whether processing should be retried; zero means no-retry; otherwise
// we should retry after that Duration.
func (s *ServiceController) processServiceDeletion(key string) error {
cachedService, ok := s.cache.get(key)
if !ok {
klog.Errorf("service %s not in cache even though the watcher thought it was. Ignoring the deletion", key)
// Cache does not contains the key means:
// - We didn't create a Load Balancer for the deleted service at all.
// - We already deleted the Load Balancer that was created for the service.
// In both cases we have nothing left to do.
return nil
}
return s.processLoadBalancerDelete(cachedService, key)
klog.V(2).Infof("Service %v has been deleted. Attempting to cleanup load balancer resources", key)
if err := s.processLoadBalancerDelete(cachedService.state, key); err != nil {
return err
}
s.cache.delete(key)
return nil
}
func (s *ServiceController) processLoadBalancerDelete(cachedService *cachedService, key string) error {
service := cachedService.state
func (s *ServiceController) processLoadBalancerDelete(service *v1.Service, key string) error {
// delete load balancer info only if the service type is LoadBalancer
if !wantsLoadBalancer(service) {
return nil
}
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletingLoadBalancer", "Deleting load balancer")
err := s.balancer.EnsureLoadBalancerDeleted(context.TODO(), s.clusterName, service)
if err != nil {
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "DeletingLoadBalancerFailed", "Error deleting load balancer (will retry): %v", err)
if err := s.balancer.EnsureLoadBalancerDeleted(context.TODO(), s.clusterName, service); err != nil {
s.eventRecorder.Eventf(service, v1.EventTypeWarning, "DeleteLoadBalancerFailed", "Error deleting load balancer: %v", err)
return err
}
s.eventRecorder.Event(service, v1.EventTypeNormal, "DeletedLoadBalancer", "Deleted load balancer")
s.cache.delete(key)
return nil
}
// addFinalizer patches the service to add finalizer.
func (s *ServiceController) addFinalizer(service *v1.Service) error {
if servicehelper.HasLBFinalizer(service) {
return nil
}
// Make a copy so we don't mutate the shared informer cache.
updated := service.DeepCopy()
updated.ObjectMeta.Finalizers = append(updated.ObjectMeta.Finalizers, servicehelper.LoadBalancerCleanupFinalizer)
klog.V(2).Infof("Adding finalizer to service %s/%s", updated.Namespace, updated.Name)
_, err := patch(s.kubeClient.CoreV1(), service, updated)
return err
}
// removeFinalizer patches the service to remove finalizer.
func (s *ServiceController) removeFinalizer(service *v1.Service) error {
if !servicehelper.HasLBFinalizer(service) {
return nil
}
// Make a copy so we don't mutate the shared informer cache.
updated := service.DeepCopy()
updated.ObjectMeta.Finalizers = slice.RemoveString(updated.ObjectMeta.Finalizers, servicehelper.LoadBalancerCleanupFinalizer, nil)
klog.V(2).Infof("Removing finalizer from service %s/%s", updated.Namespace, updated.Name)
_, err := patch(s.kubeClient.CoreV1(), service, updated)
return err
}
// patchStatus patches the service with the given LoadBalancerStatus.
func (s *ServiceController) patchStatus(service *v1.Service, previousStatus, newStatus *v1.LoadBalancerStatus) error {
if v1helper.LoadBalancerStatusEqual(previousStatus, newStatus) {
return nil
}
// Make a copy so we don't mutate the shared informer cache.
updated := service.DeepCopy()
updated.Status.LoadBalancer = *newStatus
klog.V(2).Infof("Patching status for service %s/%s", updated.Namespace, updated.Name)
_, err := patch(s.kubeClient.CoreV1(), service, updated)
return err
}

View File

@ -32,6 +32,7 @@ import (
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
appsv1listers "k8s.io/client-go/listers/apps/v1"
utilpod "k8s.io/kubernetes/pkg/api/v1/pod"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/kubelet/util/format"
@ -134,9 +135,12 @@ func MarkAllPodsNotReady(kubeClient clientset.Interface, node *v1.Node) error {
continue
}
for i, cond := range pod.Status.Conditions {
for _, cond := range pod.Status.Conditions {
if cond.Type == v1.PodReady {
pod.Status.Conditions[i].Status = v1.ConditionFalse
cond.Status = v1.ConditionFalse
if !utilpod.UpdatePodCondition(&pod.Status, &cond) {
break
}
klog.V(2).Infof("Updating ready status of pod %v to false", pod.Name)
_, err := kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(&pod)
if err != nil {

View File

@ -1,34 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package events
const (
// volume relevant event reasons
FailedBinding = "FailedBinding"
VolumeMismatch = "VolumeMismatch"
VolumeFailedRecycle = "VolumeFailedRecycle"
VolumeRecycled = "VolumeRecycled"
RecyclerPod = "RecyclerPod"
VolumeDelete = "VolumeDelete"
VolumeFailedDelete = "VolumeFailedDelete"
ExternalProvisioning = "ExternalProvisioning"
ProvisioningFailed = "ProvisioningFailed"
ProvisioningCleanupFailed = "ProvisioningCleanupFailed"
ProvisioningSucceeded = "ProvisioningSucceeded"
WaitForFirstConsumer = "WaitForFirstConsumer"
ExternalExpanding = "ExternalExpanding"
)

View File

@ -1,211 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"k8s.io/api/core/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog"
)
const (
// Subsystem names.
pvControllerSubsystem = "pv_collector"
// Metric names.
boundPVKey = "bound_pv_count"
unboundPVKey = "unbound_pv_count"
boundPVCKey = "bound_pvc_count"
unboundPVCKey = "unbound_pvc_count"
// Label names.
namespaceLabel = "namespace"
storageClassLabel = "storage_class"
)
var registerMetrics sync.Once
// PVLister used to list persistent volumes.
type PVLister interface {
List() []interface{}
}
// PVCLister used to list persistent volume claims.
type PVCLister interface {
List() []interface{}
}
// Register all metrics for pv controller.
func Register(pvLister PVLister, pvcLister PVCLister) {
registerMetrics.Do(func() {
prometheus.MustRegister(newPVAndPVCCountCollector(pvLister, pvcLister))
prometheus.MustRegister(volumeOperationMetric)
prometheus.MustRegister(volumeOperationErrorsMetric)
})
}
func newPVAndPVCCountCollector(pvLister PVLister, pvcLister PVCLister) *pvAndPVCCountCollector {
return &pvAndPVCCountCollector{pvLister, pvcLister}
}
// Custom collector for current pod and container counts.
type pvAndPVCCountCollector struct {
// Cache for accessing information about PersistentVolumes.
pvLister PVLister
// Cache for accessing information about PersistentVolumeClaims.
pvcLister PVCLister
}
var (
boundPVCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", pvControllerSubsystem, boundPVKey),
"Gauge measuring number of persistent volume currently bound",
[]string{storageClassLabel}, nil)
unboundPVCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", pvControllerSubsystem, unboundPVKey),
"Gauge measuring number of persistent volume currently unbound",
[]string{storageClassLabel}, nil)
boundPVCCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", pvControllerSubsystem, boundPVCKey),
"Gauge measuring number of persistent volume claim currently bound",
[]string{namespaceLabel}, nil)
unboundPVCCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", pvControllerSubsystem, unboundPVCKey),
"Gauge measuring number of persistent volume claim currently unbound",
[]string{namespaceLabel}, nil)
volumeOperationMetric = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "volume_operation_total_seconds",
Help: "Total volume operation time",
},
[]string{"plugin_name", "operation_name"})
volumeOperationErrorsMetric = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "volume_operation_total_errors",
Help: "Total volume operation erros",
},
[]string{"plugin_name", "operation_name"})
)
func (collector *pvAndPVCCountCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- boundPVCountDesc
ch <- unboundPVCountDesc
ch <- boundPVCCountDesc
ch <- unboundPVCCountDesc
}
func (collector *pvAndPVCCountCollector) Collect(ch chan<- prometheus.Metric) {
collector.pvCollect(ch)
collector.pvcCollect(ch)
}
func (collector *pvAndPVCCountCollector) pvCollect(ch chan<- prometheus.Metric) {
boundNumberByStorageClass := make(map[string]int)
unboundNumberByStorageClass := make(map[string]int)
for _, pvObj := range collector.pvLister.List() {
pv, ok := pvObj.(*v1.PersistentVolume)
if !ok {
continue
}
if pv.Status.Phase == v1.VolumeBound {
boundNumberByStorageClass[pv.Spec.StorageClassName]++
} else {
unboundNumberByStorageClass[pv.Spec.StorageClassName]++
}
}
for storageClassName, number := range boundNumberByStorageClass {
metric, err := prometheus.NewConstMetric(
boundPVCountDesc,
prometheus.GaugeValue,
float64(number),
storageClassName)
if err != nil {
klog.Warningf("Create bound pv number metric failed: %v", err)
continue
}
ch <- metric
}
for storageClassName, number := range unboundNumberByStorageClass {
metric, err := prometheus.NewConstMetric(
unboundPVCountDesc,
prometheus.GaugeValue,
float64(number),
storageClassName)
if err != nil {
klog.Warningf("Create unbound pv number metric failed: %v", err)
continue
}
ch <- metric
}
}
func (collector *pvAndPVCCountCollector) pvcCollect(ch chan<- prometheus.Metric) {
boundNumberByNamespace := make(map[string]int)
unboundNumberByNamespace := make(map[string]int)
for _, pvcObj := range collector.pvcLister.List() {
pvc, ok := pvcObj.(*v1.PersistentVolumeClaim)
if !ok {
continue
}
if pvc.Status.Phase == v1.ClaimBound {
boundNumberByNamespace[pvc.Namespace]++
} else {
unboundNumberByNamespace[pvc.Namespace]++
}
}
for namespace, number := range boundNumberByNamespace {
metric, err := prometheus.NewConstMetric(
boundPVCCountDesc,
prometheus.GaugeValue,
float64(number),
namespace)
if err != nil {
klog.Warningf("Create bound pvc number metric failed: %v", err)
continue
}
ch <- metric
}
for namespace, number := range unboundNumberByNamespace {
metric, err := prometheus.NewConstMetric(
unboundPVCCountDesc,
prometheus.GaugeValue,
float64(number),
namespace)
if err != nil {
klog.Warningf("Create unbound pvc number metric failed: %v", err)
continue
}
ch <- metric
}
}
// RecordVolumeOperationMetric records the latency and errors of volume operations.
func RecordVolumeOperationMetric(pluginName, opName string, timeTaken float64, err error) {
if pluginName == "" {
pluginName = "N/A"
}
if err != nil {
volumeOperationErrorsMetric.WithLabelValues(pluginName, opName).Inc()
return
}
volumeOperationMetric.WithLabelValues(pluginName, opName).Observe(timeTaken)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,523 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
import (
"fmt"
"strconv"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
coreinformers "k8s.io/client-go/informers/core/v1"
storageinformers "k8s.io/client-go/informers/storage/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics"
"k8s.io/kubernetes/pkg/util/goroutinemap"
vol "k8s.io/kubernetes/pkg/volume"
"k8s.io/klog"
)
// This file contains the controller base functionality, i.e. framework to
// process PV/PVC added/updated/deleted events. The real binding, provisioning,
// recycling and deleting is done in pv_controller.go
// ControllerParameters contains arguments for creation of a new
// PersistentVolume controller.
type ControllerParameters struct {
KubeClient clientset.Interface
SyncPeriod time.Duration
VolumePlugins []vol.VolumePlugin
Cloud cloudprovider.Interface
ClusterName string
VolumeInformer coreinformers.PersistentVolumeInformer
ClaimInformer coreinformers.PersistentVolumeClaimInformer
ClassInformer storageinformers.StorageClassInformer
PodInformer coreinformers.PodInformer
NodeInformer coreinformers.NodeInformer
EventRecorder record.EventRecorder
EnableDynamicProvisioning bool
}
// NewController creates a new PersistentVolume controller
func NewController(p ControllerParameters) (*PersistentVolumeController, error) {
eventRecorder := p.EventRecorder
if eventRecorder == nil {
broadcaster := record.NewBroadcaster()
broadcaster.StartLogging(klog.Infof)
broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: p.KubeClient.CoreV1().Events("")})
eventRecorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "persistentvolume-controller"})
}
controller := &PersistentVolumeController{
volumes: newPersistentVolumeOrderedIndex(),
claims: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
kubeClient: p.KubeClient,
eventRecorder: eventRecorder,
runningOperations: goroutinemap.NewGoRoutineMap(true /* exponentialBackOffOnError */),
cloud: p.Cloud,
enableDynamicProvisioning: p.EnableDynamicProvisioning,
clusterName: p.ClusterName,
createProvisionedPVRetryCount: createProvisionedPVRetryCount,
createProvisionedPVInterval: createProvisionedPVInterval,
claimQueue: workqueue.NewNamed("claims"),
volumeQueue: workqueue.NewNamed("volumes"),
resyncPeriod: p.SyncPeriod,
}
// Prober is nil because PV is not aware of Flexvolume.
if err := controller.volumePluginMgr.InitPlugins(p.VolumePlugins, nil /* prober */, controller); err != nil {
return nil, fmt.Errorf("Could not initialize volume plugins for PersistentVolume Controller: %v", err)
}
p.VolumeInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { controller.enqueueWork(controller.volumeQueue, obj) },
UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueWork(controller.volumeQueue, newObj) },
DeleteFunc: func(obj interface{}) { controller.enqueueWork(controller.volumeQueue, obj) },
},
)
controller.volumeLister = p.VolumeInformer.Lister()
controller.volumeListerSynced = p.VolumeInformer.Informer().HasSynced
p.ClaimInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { controller.enqueueWork(controller.claimQueue, obj) },
UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueWork(controller.claimQueue, newObj) },
DeleteFunc: func(obj interface{}) { controller.enqueueWork(controller.claimQueue, obj) },
},
)
controller.claimLister = p.ClaimInformer.Lister()
controller.claimListerSynced = p.ClaimInformer.Informer().HasSynced
controller.classLister = p.ClassInformer.Lister()
controller.classListerSynced = p.ClassInformer.Informer().HasSynced
controller.podLister = p.PodInformer.Lister()
controller.podListerSynced = p.PodInformer.Informer().HasSynced
controller.NodeLister = p.NodeInformer.Lister()
controller.NodeListerSynced = p.NodeInformer.Informer().HasSynced
return controller, nil
}
// initializeCaches fills all controller caches with initial data from etcd in
// order to have the caches already filled when first addClaim/addVolume to
// perform initial synchronization of the controller.
func (ctrl *PersistentVolumeController) initializeCaches(volumeLister corelisters.PersistentVolumeLister, claimLister corelisters.PersistentVolumeClaimLister) {
volumeList, err := volumeLister.List(labels.Everything())
if err != nil {
klog.Errorf("PersistentVolumeController can't initialize caches: %v", err)
return
}
for _, volume := range volumeList {
volumeClone := volume.DeepCopy()
if _, err = ctrl.storeVolumeUpdate(volumeClone); err != nil {
klog.Errorf("error updating volume cache: %v", err)
}
}
claimList, err := claimLister.List(labels.Everything())
if err != nil {
klog.Errorf("PersistentVolumeController can't initialize caches: %v", err)
return
}
for _, claim := range claimList {
if _, err = ctrl.storeClaimUpdate(claim.DeepCopy()); err != nil {
klog.Errorf("error updating claim cache: %v", err)
}
}
klog.V(4).Infof("controller initialized")
}
// enqueueWork adds volume or claim to given work queue.
func (ctrl *PersistentVolumeController) enqueueWork(queue workqueue.Interface, obj interface{}) {
// Beware of "xxx deleted" events
if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil {
obj = unknown.Obj
}
objName, err := controller.KeyFunc(obj)
if err != nil {
klog.Errorf("failed to get key from object: %v", err)
return
}
klog.V(5).Infof("enqueued %q for sync", objName)
queue.Add(objName)
}
func (ctrl *PersistentVolumeController) storeVolumeUpdate(volume interface{}) (bool, error) {
return storeObjectUpdate(ctrl.volumes.store, volume, "volume")
}
func (ctrl *PersistentVolumeController) storeClaimUpdate(claim interface{}) (bool, error) {
return storeObjectUpdate(ctrl.claims, claim, "claim")
}
// updateVolume runs in worker thread and handles "volume added",
// "volume updated" and "periodic sync" events.
func (ctrl *PersistentVolumeController) updateVolume(volume *v1.PersistentVolume) {
// Store the new volume version in the cache and do not process it if this
// is an old version.
new, err := ctrl.storeVolumeUpdate(volume)
if err != nil {
klog.Errorf("%v", err)
}
if !new {
return
}
err = ctrl.syncVolume(volume)
if err != nil {
if errors.IsConflict(err) {
// Version conflict error happens quite often and the controller
// recovers from it easily.
klog.V(3).Infof("could not sync volume %q: %+v", volume.Name, err)
} else {
klog.Errorf("could not sync volume %q: %+v", volume.Name, err)
}
}
}
// deleteVolume runs in worker thread and handles "volume deleted" event.
func (ctrl *PersistentVolumeController) deleteVolume(volume *v1.PersistentVolume) {
_ = ctrl.volumes.store.Delete(volume)
klog.V(4).Infof("volume %q deleted", volume.Name)
if volume.Spec.ClaimRef == nil {
return
}
// sync the claim when its volume is deleted. Explicitly syncing the
// claim here in response to volume deletion prevents the claim from
// waiting until the next sync period for its Lost status.
claimKey := claimrefToClaimKey(volume.Spec.ClaimRef)
klog.V(5).Infof("deleteVolume[%s]: scheduling sync of claim %q", volume.Name, claimKey)
ctrl.claimQueue.Add(claimKey)
}
// updateClaim runs in worker thread and handles "claim added",
// "claim updated" and "periodic sync" events.
func (ctrl *PersistentVolumeController) updateClaim(claim *v1.PersistentVolumeClaim) {
// Store the new claim version in the cache and do not process it if this is
// an old version.
new, err := ctrl.storeClaimUpdate(claim)
if err != nil {
klog.Errorf("%v", err)
}
if !new {
return
}
err = ctrl.syncClaim(claim)
if err != nil {
if errors.IsConflict(err) {
// Version conflict error happens quite often and the controller
// recovers from it easily.
klog.V(3).Infof("could not sync claim %q: %+v", claimToClaimKey(claim), err)
} else {
klog.Errorf("could not sync volume %q: %+v", claimToClaimKey(claim), err)
}
}
}
// deleteClaim runs in worker thread and handles "claim deleted" event.
func (ctrl *PersistentVolumeController) deleteClaim(claim *v1.PersistentVolumeClaim) {
_ = ctrl.claims.Delete(claim)
klog.V(4).Infof("claim %q deleted", claimToClaimKey(claim))
volumeName := claim.Spec.VolumeName
if volumeName == "" {
klog.V(5).Infof("deleteClaim[%q]: volume not bound", claimToClaimKey(claim))
return
}
// sync the volume when its claim is deleted. Explicitly sync'ing the
// volume here in response to claim deletion prevents the volume from
// waiting until the next sync period for its Release.
klog.V(5).Infof("deleteClaim[%q]: scheduling sync of volume %s", claimToClaimKey(claim), volumeName)
ctrl.volumeQueue.Add(volumeName)
}
// Run starts all of this controller's control loops
func (ctrl *PersistentVolumeController) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer ctrl.claimQueue.ShutDown()
defer ctrl.volumeQueue.ShutDown()
klog.Infof("Starting persistent volume controller")
defer klog.Infof("Shutting down persistent volume controller")
if !controller.WaitForCacheSync("persistent volume", stopCh, ctrl.volumeListerSynced, ctrl.claimListerSynced, ctrl.classListerSynced, ctrl.podListerSynced, ctrl.NodeListerSynced) {
return
}
ctrl.initializeCaches(ctrl.volumeLister, ctrl.claimLister)
go wait.Until(ctrl.resync, ctrl.resyncPeriod, stopCh)
go wait.Until(ctrl.volumeWorker, time.Second, stopCh)
go wait.Until(ctrl.claimWorker, time.Second, stopCh)
metrics.Register(ctrl.volumes.store, ctrl.claims)
<-stopCh
}
// volumeWorker processes items from volumeQueue. It must run only once,
// syncVolume is not assured to be reentrant.
func (ctrl *PersistentVolumeController) volumeWorker() {
workFunc := func() bool {
keyObj, quit := ctrl.volumeQueue.Get()
if quit {
return true
}
defer ctrl.volumeQueue.Done(keyObj)
key := keyObj.(string)
klog.V(5).Infof("volumeWorker[%s]", key)
_, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
klog.V(4).Infof("error getting name of volume %q to get volume from informer: %v", key, err)
return false
}
volume, err := ctrl.volumeLister.Get(name)
if err == nil {
// The volume still exists in informer cache, the event must have
// been add/update/sync
ctrl.updateVolume(volume)
return false
}
if !errors.IsNotFound(err) {
klog.V(2).Infof("error getting volume %q from informer: %v", key, err)
return false
}
// The volume is not in informer cache, the event must have been
// "delete"
volumeObj, found, err := ctrl.volumes.store.GetByKey(key)
if err != nil {
klog.V(2).Infof("error getting volume %q from cache: %v", key, err)
return false
}
if !found {
// The controller has already processed the delete event and
// deleted the volume from its cache
klog.V(2).Infof("deletion of volume %q was already processed", key)
return false
}
volume, ok := volumeObj.(*v1.PersistentVolume)
if !ok {
klog.Errorf("expected volume, got %+v", volumeObj)
return false
}
ctrl.deleteVolume(volume)
return false
}
for {
if quit := workFunc(); quit {
klog.Infof("volume worker queue shutting down")
return
}
}
}
// claimWorker processes items from claimQueue. It must run only once,
// syncClaim is not reentrant.
func (ctrl *PersistentVolumeController) claimWorker() {
workFunc := func() bool {
keyObj, quit := ctrl.claimQueue.Get()
if quit {
return true
}
defer ctrl.claimQueue.Done(keyObj)
key := keyObj.(string)
klog.V(5).Infof("claimWorker[%s]", key)
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
klog.V(4).Infof("error getting namespace & name of claim %q to get claim from informer: %v", key, err)
return false
}
claim, err := ctrl.claimLister.PersistentVolumeClaims(namespace).Get(name)
if err == nil {
// The claim still exists in informer cache, the event must have
// been add/update/sync
ctrl.updateClaim(claim)
return false
}
if !errors.IsNotFound(err) {
klog.V(2).Infof("error getting claim %q from informer: %v", key, err)
return false
}
// The claim is not in informer cache, the event must have been "delete"
claimObj, found, err := ctrl.claims.GetByKey(key)
if err != nil {
klog.V(2).Infof("error getting claim %q from cache: %v", key, err)
return false
}
if !found {
// The controller has already processed the delete event and
// deleted the claim from its cache
klog.V(2).Infof("deletion of claim %q was already processed", key)
return false
}
claim, ok := claimObj.(*v1.PersistentVolumeClaim)
if !ok {
klog.Errorf("expected claim, got %+v", claimObj)
return false
}
ctrl.deleteClaim(claim)
return false
}
for {
if quit := workFunc(); quit {
klog.Infof("claim worker queue shutting down")
return
}
}
}
// resync supplements short resync period of shared informers - we don't want
// all consumers of PV/PVC shared informer to have a short resync period,
// therefore we do our own.
func (ctrl *PersistentVolumeController) resync() {
klog.V(4).Infof("resyncing PV controller")
pvcs, err := ctrl.claimLister.List(labels.NewSelector())
if err != nil {
klog.Warningf("cannot list claims: %s", err)
return
}
for _, pvc := range pvcs {
ctrl.enqueueWork(ctrl.claimQueue, pvc)
}
pvs, err := ctrl.volumeLister.List(labels.NewSelector())
if err != nil {
klog.Warningf("cannot list persistent volumes: %s", err)
return
}
for _, pv := range pvs {
ctrl.enqueueWork(ctrl.volumeQueue, pv)
}
}
// setClaimProvisioner saves
// claim.Annotations[annStorageProvisioner] = class.Provisioner
func (ctrl *PersistentVolumeController) setClaimProvisioner(claim *v1.PersistentVolumeClaim, provisionerName string) (*v1.PersistentVolumeClaim, error) {
if val, ok := claim.Annotations[annStorageProvisioner]; ok && val == provisionerName {
// annotation is already set, nothing to do
return claim, nil
}
// The volume from method args can be pointing to watcher cache. We must not
// modify these, therefore create a copy.
claimClone := claim.DeepCopy()
metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, annStorageProvisioner, provisionerName)
newClaim, err := ctrl.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(claimClone)
if err != nil {
return newClaim, err
}
_, err = ctrl.storeClaimUpdate(newClaim)
if err != nil {
return newClaim, err
}
return newClaim, nil
}
// Stateless functions
func getClaimStatusForLogging(claim *v1.PersistentVolumeClaim) string {
bound := metav1.HasAnnotation(claim.ObjectMeta, annBindCompleted)
boundByController := metav1.HasAnnotation(claim.ObjectMeta, annBoundByController)
return fmt.Sprintf("phase: %s, bound to: %q, bindCompleted: %v, boundByController: %v", claim.Status.Phase, claim.Spec.VolumeName, bound, boundByController)
}
func getVolumeStatusForLogging(volume *v1.PersistentVolume) string {
boundByController := metav1.HasAnnotation(volume.ObjectMeta, annBoundByController)
claimName := ""
if volume.Spec.ClaimRef != nil {
claimName = fmt.Sprintf("%s/%s (uid: %s)", volume.Spec.ClaimRef.Namespace, volume.Spec.ClaimRef.Name, volume.Spec.ClaimRef.UID)
}
return fmt.Sprintf("phase: %s, bound to: %q, boundByController: %v", volume.Status.Phase, claimName, boundByController)
}
// storeObjectUpdate updates given cache with a new object version from Informer
// callback (i.e. with events from etcd) or with an object modified by the
// controller itself. Returns "true", if the cache was updated, false if the
// object is an old version and should be ignored.
func storeObjectUpdate(store cache.Store, obj interface{}, className string) (bool, error) {
objName, err := controller.KeyFunc(obj)
if err != nil {
return false, fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)
}
oldObj, found, err := store.Get(obj)
if err != nil {
return false, fmt.Errorf("Error finding %s %q in controller cache: %v", className, objName, err)
}
objAccessor, err := meta.Accessor(obj)
if err != nil {
return false, err
}
if !found {
// This is a new object
klog.V(4).Infof("storeObjectUpdate: adding %s %q, version %s", className, objName, objAccessor.GetResourceVersion())
if err = store.Add(obj); err != nil {
return false, fmt.Errorf("Error adding %s %q to controller cache: %v", className, objName, err)
}
return true, nil
}
oldObjAccessor, err := meta.Accessor(oldObj)
if err != nil {
return false, err
}
objResourceVersion, err := strconv.ParseInt(objAccessor.GetResourceVersion(), 10, 64)
if err != nil {
return false, fmt.Errorf("Error parsing ResourceVersion %q of %s %q: %s", objAccessor.GetResourceVersion(), className, objName, err)
}
oldObjResourceVersion, err := strconv.ParseInt(oldObjAccessor.GetResourceVersion(), 10, 64)
if err != nil {
return false, fmt.Errorf("Error parsing old ResourceVersion %q of %s %q: %s", oldObjAccessor.GetResourceVersion(), className, objName, err)
}
// Throw away only older version, let the same version pass - we do want to
// get periodic sync events.
if oldObjResourceVersion > objResourceVersion {
klog.V(4).Infof("storeObjectUpdate: ignoring %s %q version %s", className, objName, objAccessor.GetResourceVersion())
return false, nil
}
klog.V(4).Infof("storeObjectUpdate updating %s %q with version %s", className, objName, objAccessor.GetResourceVersion())
if err = store.Update(obj); err != nil {
return false, fmt.Errorf("Error updating %s %q in controller cache: %v", className, objName, err)
}
return true, nil
}

View File

@ -1,103 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
import (
"fmt"
"k8s.io/api/core/v1"
storage "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/scheme"
storagelisters "k8s.io/client-go/listers/storage/v1"
"k8s.io/client-go/tools/reference"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
)
// IsDelayBindingMode checks if claim is in delay binding mode.
func IsDelayBindingMode(claim *v1.PersistentVolumeClaim, classLister storagelisters.StorageClassLister) (bool, error) {
className := v1helper.GetPersistentVolumeClaimClass(claim)
if className == "" {
return false, nil
}
class, err := classLister.Get(className)
if err != nil {
return false, nil
}
if class.VolumeBindingMode == nil {
return false, fmt.Errorf("VolumeBindingMode not set for StorageClass %q", className)
}
return *class.VolumeBindingMode == storage.VolumeBindingWaitForFirstConsumer, nil
}
// GetBindVolumeToClaim returns a new volume which is bound to given claim. In
// addition, it returns a bool which indicates whether we made modification on
// original volume.
func GetBindVolumeToClaim(volume *v1.PersistentVolume, claim *v1.PersistentVolumeClaim) (*v1.PersistentVolume, bool, error) {
dirty := false
// Check if the volume was already bound (either by user or by controller)
shouldSetBoundByController := false
if !IsVolumeBoundToClaim(volume, claim) {
shouldSetBoundByController = true
}
// The volume from method args can be pointing to watcher cache. We must not
// modify these, therefore create a copy.
volumeClone := volume.DeepCopy()
// Bind the volume to the claim if it is not bound yet
if volume.Spec.ClaimRef == nil ||
volume.Spec.ClaimRef.Name != claim.Name ||
volume.Spec.ClaimRef.Namespace != claim.Namespace ||
volume.Spec.ClaimRef.UID != claim.UID {
claimRef, err := reference.GetReference(scheme.Scheme, claim)
if err != nil {
return nil, false, fmt.Errorf("Unexpected error getting claim reference: %v", err)
}
volumeClone.Spec.ClaimRef = claimRef
dirty = true
}
// Set annBoundByController if it is not set yet
if shouldSetBoundByController && !metav1.HasAnnotation(volumeClone.ObjectMeta, annBoundByController) {
metav1.SetMetaDataAnnotation(&volumeClone.ObjectMeta, annBoundByController, "yes")
dirty = true
}
return volumeClone, dirty, nil
}
// IsVolumeBoundToClaim returns true, if given volume is pre-bound or bound
// to specific claim. Both claim.Name and claim.Namespace must be equal.
// If claim.UID is present in volume.Spec.ClaimRef, it must be equal too.
func IsVolumeBoundToClaim(volume *v1.PersistentVolume, claim *v1.PersistentVolumeClaim) bool {
if volume.Spec.ClaimRef == nil {
return false
}
if claim.Name != volume.Spec.ClaimRef.Name || claim.Namespace != volume.Spec.ClaimRef.Namespace {
return false
}
if volume.Spec.ClaimRef.UID != "" && claim.UID != volume.Spec.ClaimRef.UID {
return false
}
return true
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2014 The Kubernetes Authors.
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -18,97 +18,132 @@ package persistentvolume
import (
"fmt"
"sort"
"k8s.io/api/core/v1"
storage "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/kubernetes/scheme"
storagelisters "k8s.io/client-go/listers/storage/v1"
"k8s.io/client-go/tools/reference"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// persistentVolumeOrderedIndex is a cache.Store that keeps persistent volumes
// indexed by AccessModes and ordered by storage capacity.
type persistentVolumeOrderedIndex struct {
store cache.Indexer
}
const (
// AnnBindCompleted Annotation applies to PVCs. It indicates that the lifecycle
// of the PVC has passed through the initial setup. This information changes how
// we interpret some observations of the state of the objects. Value of this
// Annotation does not matter.
AnnBindCompleted = "pv.kubernetes.io/bind-completed"
func newPersistentVolumeOrderedIndex() persistentVolumeOrderedIndex {
return persistentVolumeOrderedIndex{cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{"accessmodes": accessModesIndexFunc})}
}
// AnnBoundByController annotation applies to PVs and PVCs. It indicates that
// the binding (PV->PVC or PVC->PV) was installed by the controller. The
// absence of this annotation means the binding was done by the user (i.e.
// pre-bound). Value of this annotation does not matter.
// External PV binders must bind PV the same way as PV controller, otherwise PV
// controller may not handle it correctly.
AnnBoundByController = "pv.kubernetes.io/bound-by-controller"
// accessModesIndexFunc is an indexing function that returns a persistent
// volume's AccessModes as a string
func accessModesIndexFunc(obj interface{}) ([]string, error) {
if pv, ok := obj.(*v1.PersistentVolume); ok {
modes := v1helper.GetAccessModesAsString(pv.Spec.AccessModes)
return []string{modes}, nil
}
return []string{""}, fmt.Errorf("object is not a persistent volume: %v", obj)
}
// AnnSelectedNode annotation is added to a PVC that has been triggered by scheduler to
// be dynamically provisioned. Its value is the name of the selected node.
AnnSelectedNode = "volume.kubernetes.io/selected-node"
// listByAccessModes returns all volumes with the given set of
// AccessModeTypes. The list is unsorted!
func (pvIndex *persistentVolumeOrderedIndex) listByAccessModes(modes []v1.PersistentVolumeAccessMode) ([]*v1.PersistentVolume, error) {
pv := &v1.PersistentVolume{
Spec: v1.PersistentVolumeSpec{
AccessModes: modes,
},
// NotSupportedProvisioner is a special provisioner name which can be set
// in storage class to indicate dynamic provisioning is not supported by
// the storage.
NotSupportedProvisioner = "kubernetes.io/no-provisioner"
// AnnDynamicallyProvisioned annotation is added to a PV that has been dynamically provisioned by
// Kubernetes. Its value is name of volume plugin that created the volume.
// It serves both user (to show where a PV comes from) and Kubernetes (to
// recognize dynamically provisioned PVs in its decisions).
AnnDynamicallyProvisioned = "pv.kubernetes.io/provisioned-by"
// AnnStorageProvisioner annotation is added to a PVC that is supposed to be dynamically
// provisioned. Its value is name of volume plugin that is supposed to provision
// a volume for this PVC.
AnnStorageProvisioner = "volume.beta.kubernetes.io/storage-provisioner"
)
// IsDelayBindingMode checks if claim is in delay binding mode.
func IsDelayBindingMode(claim *v1.PersistentVolumeClaim, classLister storagelisters.StorageClassLister) (bool, error) {
className := v1helper.GetPersistentVolumeClaimClass(claim)
if className == "" {
return false, nil
}
objs, err := pvIndex.store.Index("accessmodes", pv)
class, err := classLister.Get(className)
if err != nil {
return nil, err
return false, nil
}
volumes := make([]*v1.PersistentVolume, len(objs))
for i, obj := range objs {
volumes[i] = obj.(*v1.PersistentVolume)
if class.VolumeBindingMode == nil {
return false, fmt.Errorf("VolumeBindingMode not set for StorageClass %q", className)
}
return volumes, nil
return *class.VolumeBindingMode == storage.VolumeBindingWaitForFirstConsumer, nil
}
// find returns the nearest PV from the ordered list or nil if a match is not found
func (pvIndex *persistentVolumeOrderedIndex) findByClaim(claim *v1.PersistentVolumeClaim, delayBinding bool) (*v1.PersistentVolume, error) {
// PVs are indexed by their access modes to allow easier searching. Each
// index is the string representation of a set of access modes. There is a
// finite number of possible sets and PVs will only be indexed in one of
// them (whichever index matches the PV's modes).
//
// A request for resources will always specify its desired access modes.
// Any matching PV must have at least that number of access modes, but it
// can have more. For example, a user asks for ReadWriteOnce but a GCEPD
// is available, which is ReadWriteOnce+ReadOnlyMany.
//
// Searches are performed against a set of access modes, so we can attempt
// not only the exact matching modes but also potential matches (the GCEPD
// example above).
allPossibleModes := pvIndex.allPossibleMatchingAccessModes(claim.Spec.AccessModes)
// GetBindVolumeToClaim returns a new volume which is bound to given claim. In
// addition, it returns a bool which indicates whether we made modification on
// original volume.
func GetBindVolumeToClaim(volume *v1.PersistentVolume, claim *v1.PersistentVolumeClaim) (*v1.PersistentVolume, bool, error) {
dirty := false
for _, modes := range allPossibleModes {
volumes, err := pvIndex.listByAccessModes(modes)
if err != nil {
return nil, err
}
bestVol, err := findMatchingVolume(claim, volumes, nil /* node for topology binding*/, nil /* exclusion map */, delayBinding)
if err != nil {
return nil, err
}
if bestVol != nil {
return bestVol, nil
}
// Check if the volume was already bound (either by user or by controller)
shouldSetBoundByController := false
if !IsVolumeBoundToClaim(volume, claim) {
shouldSetBoundByController = true
}
return nil, nil
// The volume from method args can be pointing to watcher cache. We must not
// modify these, therefore create a copy.
volumeClone := volume.DeepCopy()
// Bind the volume to the claim if it is not bound yet
if volume.Spec.ClaimRef == nil ||
volume.Spec.ClaimRef.Name != claim.Name ||
volume.Spec.ClaimRef.Namespace != claim.Namespace ||
volume.Spec.ClaimRef.UID != claim.UID {
claimRef, err := reference.GetReference(scheme.Scheme, claim)
if err != nil {
return nil, false, fmt.Errorf("Unexpected error getting claim reference: %v", err)
}
volumeClone.Spec.ClaimRef = claimRef
dirty = true
}
// Set AnnBoundByController if it is not set yet
if shouldSetBoundByController && !metav1.HasAnnotation(volumeClone.ObjectMeta, AnnBoundByController) {
metav1.SetMetaDataAnnotation(&volumeClone.ObjectMeta, AnnBoundByController, "yes")
dirty = true
}
return volumeClone, dirty, nil
}
// findMatchingVolume goes through the list of volumes to find the best matching volume
// IsVolumeBoundToClaim returns true, if given volume is pre-bound or bound
// to specific claim. Both claim.Name and claim.Namespace must be equal.
// If claim.UID is present in volume.Spec.ClaimRef, it must be equal too.
func IsVolumeBoundToClaim(volume *v1.PersistentVolume, claim *v1.PersistentVolumeClaim) bool {
if volume.Spec.ClaimRef == nil {
return false
}
if claim.Name != volume.Spec.ClaimRef.Name || claim.Namespace != volume.Spec.ClaimRef.Namespace {
return false
}
if volume.Spec.ClaimRef.UID != "" && claim.UID != volume.Spec.ClaimRef.UID {
return false
}
return true
}
// FindMatchingVolume goes through the list of volumes to find the best matching volume
// for the claim.
//
// This function is used by both the PV controller and scheduler.
@ -122,7 +157,7 @@ func (pvIndex *persistentVolumeOrderedIndex) findByClaim(claim *v1.PersistentVol
// excludedVolumes is only used in the scheduler path, and is needed for evaluating multiple
// unbound PVCs for a single Pod at one time. As each PVC finds a matching PV, the chosen
// PV needs to be excluded from future matching.
func findMatchingVolume(
func FindMatchingVolume(
claim *v1.PersistentVolumeClaim,
volumes []*v1.PersistentVolume,
node *v1.Node,
@ -159,7 +194,7 @@ func findMatchingVolume(
volumeQty := volume.Spec.Capacity[v1.ResourceStorage]
// check if volumeModes do not match (feature gate protected)
isMismatch, err := checkVolumeModeMismatches(&claim.Spec, &volume.Spec)
isMismatch, err := CheckVolumeModeMismatches(&claim.Spec, &volume.Spec)
if err != nil {
return nil, fmt.Errorf("error checking if volumeMode was a mismatch: %v", err)
}
@ -237,7 +272,7 @@ func findMatchingVolume(
if node != nil {
// Scheduler path
// Check that the access modes match
if !checkAccessModes(claim, volume) {
if !CheckAccessModes(claim, volume) {
continue
}
}
@ -258,9 +293,9 @@ func findMatchingVolume(
return nil, nil
}
// checkVolumeModeMismatches is a convenience method that checks volumeMode for PersistentVolume
// CheckVolumeModeMismatches is a convenience method that checks volumeMode for PersistentVolume
// and PersistentVolumeClaims
func checkVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1.PersistentVolumeSpec) (bool, error) {
func CheckVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1.PersistentVolumeSpec) (bool, error) {
if !utilfeature.DefaultFeatureGate.Enabled(features.BlockVolume) {
return false, nil
}
@ -278,93 +313,8 @@ func checkVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1
return requestedVolumeMode != pvVolumeMode, nil
}
// findBestMatchForClaim is a convenience method that finds a volume by the claim's AccessModes and requests for Storage
func (pvIndex *persistentVolumeOrderedIndex) findBestMatchForClaim(claim *v1.PersistentVolumeClaim, delayBinding bool) (*v1.PersistentVolume, error) {
return pvIndex.findByClaim(claim, delayBinding)
}
// allPossibleMatchingAccessModes returns an array of AccessMode arrays that
// can satisfy a user's requested modes.
//
// see comments in the Find func above regarding indexing.
//
// allPossibleMatchingAccessModes gets all stringified accessmodes from the
// index and returns all those that contain at least all of the requested
// mode.
//
// For example, assume the index contains 2 types of PVs where the stringified
// accessmodes are:
//
// "RWO,ROX" -- some number of GCEPDs
// "RWO,ROX,RWX" -- some number of NFS volumes
//
// A request for RWO could be satisfied by both sets of indexed volumes, so
// allPossibleMatchingAccessModes returns:
//
// [][]v1.PersistentVolumeAccessMode {
// []v1.PersistentVolumeAccessMode {
// v1.ReadWriteOnce, v1.ReadOnlyMany,
// },
// []v1.PersistentVolumeAccessMode {
// v1.ReadWriteOnce, v1.ReadOnlyMany, v1.ReadWriteMany,
// },
// }
//
// A request for RWX can be satisfied by only one set of indexed volumes, so
// the return is:
//
// [][]v1.PersistentVolumeAccessMode {
// []v1.PersistentVolumeAccessMode {
// v1.ReadWriteOnce, v1.ReadOnlyMany, v1.ReadWriteMany,
// },
// }
//
// This func returns modes with ascending levels of modes to give the user
// what is closest to what they actually asked for.
func (pvIndex *persistentVolumeOrderedIndex) allPossibleMatchingAccessModes(requestedModes []v1.PersistentVolumeAccessMode) [][]v1.PersistentVolumeAccessMode {
matchedModes := [][]v1.PersistentVolumeAccessMode{}
keys := pvIndex.store.ListIndexFuncValues("accessmodes")
for _, key := range keys {
indexedModes := v1helper.GetAccessModesFromString(key)
if volumeutil.AccessModesContainedInAll(indexedModes, requestedModes) {
matchedModes = append(matchedModes, indexedModes)
}
}
// sort by the number of modes in each array with the fewest number of
// modes coming first. this allows searching for volumes by the minimum
// number of modes required of the possible matches.
sort.Sort(byAccessModes{matchedModes})
return matchedModes
}
// byAccessModes is used to order access modes by size, with the fewest modes first
type byAccessModes struct {
modes [][]v1.PersistentVolumeAccessMode
}
func (c byAccessModes) Less(i, j int) bool {
return len(c.modes[i]) < len(c.modes[j])
}
func (c byAccessModes) Swap(i, j int) {
c.modes[i], c.modes[j] = c.modes[j], c.modes[i]
}
func (c byAccessModes) Len() int {
return len(c.modes)
}
func claimToClaimKey(claim *v1.PersistentVolumeClaim) string {
return fmt.Sprintf("%s/%s", claim.Namespace, claim.Name)
}
func claimrefToClaimKey(claimref *v1.ObjectReference) string {
return fmt.Sprintf("%s/%s", claimref.Namespace, claimref.Name)
}
// Returns true if PV satisfies all the PVC's requested AccessModes
func checkAccessModes(claim *v1.PersistentVolumeClaim, volume *v1.PersistentVolume) bool {
// CheckAccessModes returns true if PV satisfies all the PVC's requested AccessModes
func CheckAccessModes(claim *v1.PersistentVolumeClaim, volume *v1.PersistentVolume) bool {
pvModesMap := map[v1.PersistentVolumeAccessMode]bool{}
for _, mode := range volume.Spec.AccessModes {
pvModesMap[mode] = true
@ -378,3 +328,26 @@ func checkAccessModes(claim *v1.PersistentVolumeClaim, volume *v1.PersistentVolu
}
return true
}
func claimToClaimKey(claim *v1.PersistentVolumeClaim) string {
return fmt.Sprintf("%s/%s", claim.Namespace, claim.Name)
}
// GetVolumeNodeAffinity returns a VolumeNodeAffinity for given key and value.
func GetVolumeNodeAffinity(key string, value string) *v1.VolumeNodeAffinity {
return &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: key,
Operator: v1.NodeSelectorOpIn,
Values: []string{value},
},
},
},
},
},
}
}

View File

@ -1,138 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
import (
"fmt"
"net"
authenticationv1 "k8s.io/api/authentication/v1"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/record"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/util/mount"
vol "k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util/subpath"
)
// VolumeHost interface implementation for PersistentVolumeController.
var _ vol.VolumeHost = &PersistentVolumeController{}
func (ctrl *PersistentVolumeController) GetPluginDir(pluginName string) string {
return ""
}
func (ctrl *PersistentVolumeController) GetVolumeDevicePluginDir(pluginName string) string {
return ""
}
func (ctrl *PersistentVolumeController) GetPodsDir() string {
return ""
}
func (ctrl *PersistentVolumeController) GetPodVolumeDir(podUID types.UID, pluginName string, volumeName string) string {
return ""
}
func (ctrl *PersistentVolumeController) GetPodPluginDir(podUID types.UID, pluginName string) string {
return ""
}
func (ctrl *PersistentVolumeController) GetPodVolumeDeviceDir(ppodUID types.UID, pluginName string) string {
return ""
}
func (ctrl *PersistentVolumeController) GetKubeClient() clientset.Interface {
return ctrl.kubeClient
}
func (ctrl *PersistentVolumeController) NewWrapperMounter(volName string, spec vol.Spec, pod *v1.Pod, opts vol.VolumeOptions) (vol.Mounter, error) {
return nil, fmt.Errorf("PersistentVolumeController.NewWrapperMounter is not implemented")
}
func (ctrl *PersistentVolumeController) NewWrapperUnmounter(volName string, spec vol.Spec, podUID types.UID) (vol.Unmounter, error) {
return nil, fmt.Errorf("PersistentVolumeController.NewWrapperMounter is not implemented")
}
func (ctrl *PersistentVolumeController) GetCloudProvider() cloudprovider.Interface {
return ctrl.cloud
}
func (ctrl *PersistentVolumeController) GetMounter(pluginName string) mount.Interface {
return nil
}
func (ctrl *PersistentVolumeController) GetHostName() string {
return ""
}
func (ctrl *PersistentVolumeController) GetHostIP() (net.IP, error) {
return nil, fmt.Errorf("PersistentVolumeController.GetHostIP() is not implemented")
}
func (ctrl *PersistentVolumeController) GetNodeAllocatable() (v1.ResourceList, error) {
return v1.ResourceList{}, nil
}
func (ctrl *PersistentVolumeController) GetSecretFunc() func(namespace, name string) (*v1.Secret, error) {
return func(_, _ string) (*v1.Secret, error) {
return nil, fmt.Errorf("GetSecret unsupported in PersistentVolumeController")
}
}
func (ctrl *PersistentVolumeController) GetConfigMapFunc() func(namespace, name string) (*v1.ConfigMap, error) {
return func(_, _ string) (*v1.ConfigMap, error) {
return nil, fmt.Errorf("GetConfigMap unsupported in PersistentVolumeController")
}
}
func (ctrl *PersistentVolumeController) GetServiceAccountTokenFunc() func(_, _ string, _ *authenticationv1.TokenRequest) (*authenticationv1.TokenRequest, error) {
return func(_, _ string, _ *authenticationv1.TokenRequest) (*authenticationv1.TokenRequest, error) {
return nil, fmt.Errorf("GetServiceAccountToken unsupported in PersistentVolumeController")
}
}
func (ctrl *PersistentVolumeController) DeleteServiceAccountTokenFunc() func(types.UID) {
return func(types.UID) {
klog.Errorf("DeleteServiceAccountToken unsupported in PersistentVolumeController")
}
}
func (adc *PersistentVolumeController) GetExec(pluginName string) mount.Exec {
return mount.NewOsExec()
}
func (ctrl *PersistentVolumeController) GetNodeLabels() (map[string]string, error) {
return nil, fmt.Errorf("GetNodeLabels() unsupported in PersistentVolumeController")
}
func (ctrl *PersistentVolumeController) GetNodeName() types.NodeName {
return ""
}
func (ctrl *PersistentVolumeController) GetEventRecorder() record.EventRecorder {
return ctrl.eventRecorder
}
func (ctrl *PersistentVolumeController) GetSubpather() subpath.Interface {
// No volume plugin needs Subpaths in PV controller.
return nil
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
package scheduling
import (
"fmt"
@ -127,7 +127,8 @@ func (c *assumeCache) objInfoIndexFunc(obj interface{}) ([]string, error) {
return c.indexFunc(objInfo.latestObj)
}
func NewAssumeCache(informer cache.SharedIndexInformer, description, indexName string, indexFunc cache.IndexFunc) *assumeCache {
// NewAssumeCache creates an assume cache for genernal objects.
func NewAssumeCache(informer cache.SharedIndexInformer, description, indexName string, indexFunc cache.IndexFunc) AssumeCache {
c := &assumeCache{
description: description,
indexFunc: indexFunc,
@ -344,7 +345,7 @@ type PVAssumeCache interface {
}
type pvAssumeCache struct {
*assumeCache
AssumeCache
}
func pvStorageClassIndexFunc(obj interface{}) ([]string, error) {
@ -354,8 +355,9 @@ func pvStorageClassIndexFunc(obj interface{}) ([]string, error) {
return []string{""}, fmt.Errorf("object is not a v1.PersistentVolume: %v", obj)
}
// NewPVAssumeCache creates a PV assume cache.
func NewPVAssumeCache(informer cache.SharedIndexInformer) PVAssumeCache {
return &pvAssumeCache{assumeCache: NewAssumeCache(informer, "v1.PersistentVolume", "storageclass", pvStorageClassIndexFunc)}
return &pvAssumeCache{NewAssumeCache(informer, "v1.PersistentVolume", "storageclass", pvStorageClassIndexFunc)}
}
func (c *pvAssumeCache) GetPV(pvName string) (*v1.PersistentVolume, error) {
@ -411,11 +413,12 @@ type PVCAssumeCache interface {
}
type pvcAssumeCache struct {
*assumeCache
AssumeCache
}
// NewPVCAssumeCache creates a PVC assume cache.
func NewPVCAssumeCache(informer cache.SharedIndexInformer) PVCAssumeCache {
return &pvcAssumeCache{assumeCache: NewAssumeCache(informer, "v1.PersistentVolumeClaim", "namespace", cache.MetaNamespaceIndexFunc)}
return &pvcAssumeCache{NewAssumeCache(informer, "v1.PersistentVolumeClaim", "namespace", cache.MetaNamespaceIndexFunc)}
}
func (c *pvcAssumeCache) GetPVC(pvcKey string) (*v1.PersistentVolumeClaim, error) {

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
package scheduling
import (
"github.com/prometheus/client_golang/prometheus"
@ -24,6 +24,7 @@ import (
const VolumeSchedulerSubsystem = "scheduler_volume"
var (
// VolumeBindingRequestSchedulerBinderCache tracks the number of volume binder cache operations.
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
@ -32,6 +33,7 @@ var (
},
[]string{"operation"},
)
// VolumeSchedulingStageLatency tracks the latency of volume scheduling operations.
VolumeSchedulingStageLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: VolumeSchedulerSubsystem,
@ -41,6 +43,7 @@ var (
},
[]string{"operation"},
)
// VolumeSchedulingStageFailed tracks the number of failed volume scheduling operations.
VolumeSchedulingStageFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
package scheduling
import (
"fmt"
@ -32,6 +32,7 @@ import (
storagelisters "k8s.io/client-go/listers/storage/v1"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
pvutil "k8s.io/kubernetes/pkg/controller/volume/persistentvolume/util"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
@ -139,15 +140,6 @@ func (b *volumeBinder) GetBindingsCache() PodBindingCache {
return b.podBindingCache
}
func podHasClaims(pod *v1.Pod) bool {
for _, vol := range pod.Spec.Volumes {
if vol.PersistentVolumeClaim != nil {
return true
}
}
return false
}
// FindPodVolumes caches the matching PVs and PVCs to provision per node in podBindingCache.
// This method intentionally takes in a *v1.Node object instead of using volumebinder.nodeInformer.
// That's necessary because some operations will need to pass in to the predicate fake node objects.
@ -168,32 +160,27 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
}
}()
if !podHasClaims(pod) {
// Fast path
return unboundVolumesSatisfied, boundVolumesSatisfied, nil
}
var (
matchedClaims []*bindingInfo
matchedBindings []*bindingInfo
provisionedClaims []*v1.PersistentVolumeClaim
)
defer func() {
// We recreate bindings for each new schedule loop.
if len(matchedClaims) == 0 && len(provisionedClaims) == 0 {
if len(matchedBindings) == 0 && len(provisionedClaims) == 0 {
// Clear cache if no claims to bind or provision for this node.
b.podBindingCache.ClearBindings(pod, node.Name)
return
}
// Although we do not distinguish nil from empty in this function, for
// easier testing, we normalize empty to nil.
if len(matchedClaims) == 0 {
matchedClaims = nil
if len(matchedBindings) == 0 {
matchedBindings = nil
}
if len(provisionedClaims) == 0 {
provisionedClaims = nil
}
// Mark cache with all matched and provisioned claims for this node
b.podBindingCache.UpdateBindings(pod, node.Name, matchedClaims, provisionedClaims)
b.podBindingCache.UpdateBindings(pod, node.Name, matchedBindings, provisionedClaims)
}()
// The pod's volumes need to be processed in one call to avoid the race condition where
@ -225,7 +212,7 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
// Filter out claims to provision
for _, claim := range claimsToBind {
if selectedNode, ok := claim.Annotations[annSelectedNode]; ok {
if selectedNode, ok := claim.Annotations[pvutil.AnnSelectedNode]; ok {
if selectedNode != node.Name {
// Fast path, skip unmatched node
return false, boundVolumesSatisfied, nil
@ -239,7 +226,7 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
// Find matching volumes
if len(claimsToFindMatching) > 0 {
var unboundClaims []*v1.PersistentVolumeClaim
unboundVolumesSatisfied, matchedClaims, unboundClaims, err = b.findMatchingVolumes(pod, claimsToFindMatching, node)
unboundVolumesSatisfied, matchedBindings, unboundClaims, err = b.findMatchingVolumes(pod, claimsToFindMatching, node)
if err != nil {
return false, false, err
}
@ -288,7 +275,7 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
// Assume PV
newBindings := []*bindingInfo{}
for _, binding := range claimsToBind {
newPV, dirty, err := GetBindVolumeToClaim(binding.pv, binding.pvc)
newPV, dirty, err := pvutil.GetBindVolumeToClaim(binding.pv, binding.pvc)
klog.V(5).Infof("AssumePodVolumes: GetBindVolumeToClaim for pod %q, PV %q, PVC %q. newPV %p, dirty %v, err: %v",
podName,
binding.pv.Name,
@ -317,7 +304,7 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
// The claims from method args can be pointing to watcher cache. We must not
// modify these, therefore create a copy.
claimClone := claim.DeepCopy()
metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, annSelectedNode, nodeName)
metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, pvutil.AnnSelectedNode, nodeName)
err = b.pvcCache.Assume(claimClone)
if err != nil {
b.revertAssumedPVs(newBindings)
@ -410,14 +397,14 @@ func (b *volumeBinder) bindAPIUpdate(podName string, bindings []*bindingInfo, cl
// TODO: does it hurt if we make an api call and nothing needs to be updated?
claimKey := claimToClaimKey(binding.pvc)
klog.V(2).Infof("claim %q bound to volume %q", claimKey, binding.pv.Name)
if newPV, err := b.kubeClient.CoreV1().PersistentVolumes().Update(binding.pv); err != nil {
newPV, err := b.kubeClient.CoreV1().PersistentVolumes().Update(binding.pv)
if err != nil {
klog.V(4).Infof("updating PersistentVolume[%s]: binding to %q failed: %v", binding.pv.Name, claimKey, err)
return err
} else {
klog.V(4).Infof("updating PersistentVolume[%s]: bound to %q", binding.pv.Name, claimKey)
// Save updated object from apiserver for later checking.
binding.pv = newPV
}
klog.V(4).Infof("updating PersistentVolume[%s]: bound to %q", binding.pv.Name, claimKey)
// Save updated object from apiserver for later checking.
binding.pv = newPV
lastProcessedBinding++
}
@ -425,12 +412,12 @@ func (b *volumeBinder) bindAPIUpdate(podName string, bindings []*bindingInfo, cl
// PV controller is expect to signal back by removing related annotations if actual provisioning fails
for i, claim = range claimsToProvision {
klog.V(5).Infof("bindAPIUpdate: Pod %q, PVC %q", podName, getPVCName(claim))
if newClaim, err := b.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(claim); err != nil {
newClaim, err := b.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(claim)
if err != nil {
return err
} else {
// Save updated object from apiserver for later checking.
claimsToProvision[i] = newClaim
}
// Save updated object from apiserver for later checking.
claimsToProvision[i] = newClaim
lastProcessedProvisioning++
}
@ -525,7 +512,7 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
if pvc.Annotations == nil {
return false, fmt.Errorf("selectedNode annotation reset for PVC %q", pvc.Name)
}
selectedNode := pvc.Annotations[annSelectedNode]
selectedNode := pvc.Annotations[pvutil.AnnSelectedNode]
if selectedNode != pod.Spec.NodeName {
return false, fmt.Errorf("selectedNode annotation value %q not set to scheduled node %q", selectedNode, pod.Spec.NodeName)
}
@ -540,9 +527,8 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
// And if PV does not exist because it's deleted, PVC will
// be unbound eventually.
return false, nil
} else {
return false, fmt.Errorf("failed to get pv %q from cache: %v", pvc.Spec.VolumeName, err)
}
return false, fmt.Errorf("failed to get pv %q from cache: %v", pvc.Spec.VolumeName, err)
}
if err := volumeutil.CheckNodeAffinity(pv, node.Labels); err != nil {
return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %v", pv.Name, node.Name, err)
@ -596,7 +582,7 @@ func (b *volumeBinder) isPVCBound(namespace, pvcName string) (bool, *v1.Persiste
}
func (b *volumeBinder) isPVCFullyBound(pvc *v1.PersistentVolumeClaim) bool {
return pvc.Spec.VolumeName != "" && metav1.HasAnnotation(pvc.ObjectMeta, annBindCompleted)
return pvc.Spec.VolumeName != "" && metav1.HasAnnotation(pvc.ObjectMeta, pvutil.AnnBindCompleted)
}
// arePodVolumesBound returns true if all volumes are fully bound
@ -612,10 +598,10 @@ func (b *volumeBinder) arePodVolumesBound(pod *v1.Pod) bool {
// getPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
// and unbound with immediate binding (including prebound)
func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentVolumeClaim, unboundClaims []*v1.PersistentVolumeClaim, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentVolumeClaim, unboundClaimsDelayBinding []*v1.PersistentVolumeClaim, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
boundClaims = []*v1.PersistentVolumeClaim{}
unboundClaimsImmediate = []*v1.PersistentVolumeClaim{}
unboundClaims = []*v1.PersistentVolumeClaim{}
unboundClaimsDelayBinding = []*v1.PersistentVolumeClaim{}
for _, vol := range pod.Spec.Volumes {
volumeBound, pvc, err := b.isVolumeBound(pod.Namespace, &vol)
@ -628,14 +614,14 @@ func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentV
if volumeBound {
boundClaims = append(boundClaims, pvc)
} else {
delayBindingMode, err := IsDelayBindingMode(pvc, b.classLister)
delayBindingMode, err := pvutil.IsDelayBindingMode(pvc, b.classLister)
if err != nil {
return nil, nil, nil, err
}
// Prebound PVCs are treated as unbound immediate binding
if delayBindingMode && pvc.Spec.VolumeName == "" {
// Scheduler path
unboundClaims = append(unboundClaims, pvc)
unboundClaimsDelayBinding = append(unboundClaimsDelayBinding, pvc)
} else {
// !delayBindingMode || pvc.Spec.VolumeName != ""
// Immediate binding should have already been bound
@ -643,7 +629,7 @@ func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentV
}
}
}
return boundClaims, unboundClaims, unboundClaimsImmediate, nil
return boundClaims, unboundClaimsDelayBinding, unboundClaimsImmediate, nil
}
func (b *volumeBinder) checkBoundClaims(claims []*v1.PersistentVolumeClaim, node *v1.Node, podName string) (bool, error) {
@ -668,7 +654,7 @@ func (b *volumeBinder) checkBoundClaims(claims []*v1.PersistentVolumeClaim, node
// findMatchingVolumes tries to find matching volumes for given claims,
// and return unbound claims for further provision.
func (b *volumeBinder) findMatchingVolumes(pod *v1.Pod, claimsToBind []*v1.PersistentVolumeClaim, node *v1.Node) (foundMatches bool, matchedClaims []*bindingInfo, unboundClaims []*v1.PersistentVolumeClaim, err error) {
func (b *volumeBinder) findMatchingVolumes(pod *v1.Pod, claimsToBind []*v1.PersistentVolumeClaim, node *v1.Node) (foundMatches bool, bindings []*bindingInfo, unboundClaims []*v1.PersistentVolumeClaim, err error) {
podName := getPodName(pod)
// Sort all the claims by increasing size request to get the smallest fits
sort.Sort(byPVCSize(claimsToBind))
@ -676,7 +662,6 @@ func (b *volumeBinder) findMatchingVolumes(pod *v1.Pod, claimsToBind []*v1.Persi
chosenPVs := map[string]*v1.PersistentVolume{}
foundMatches = true
matchedClaims = []*bindingInfo{}
for _, pvc := range claimsToBind {
// Get storage class name from each PVC
@ -689,7 +674,7 @@ func (b *volumeBinder) findMatchingVolumes(pod *v1.Pod, claimsToBind []*v1.Persi
pvcName := getPVCName(pvc)
// Find a matching PV
pv, err := findMatchingVolume(pvc, allPVs, node, chosenPVs, true)
pv, err := pvutil.FindMatchingVolume(pvc, allPVs, node, chosenPVs, true)
if err != nil {
return false, nil, nil, err
}
@ -702,7 +687,7 @@ func (b *volumeBinder) findMatchingVolumes(pod *v1.Pod, claimsToBind []*v1.Persi
// matching PV needs to be excluded so we don't select it again
chosenPVs[pv.Name] = pv
matchedClaims = append(matchedClaims, &bindingInfo{pv: pv, pvc: pvc})
bindings = append(bindings, &bindingInfo{pv: pv, pvc: pvc})
klog.V(5).Infof("Found matching PV %q for PVC %q on node %q for pod %q", pv.Name, pvcName, node.Name, podName)
}
@ -732,7 +717,7 @@ func (b *volumeBinder) checkVolumeProvisions(pod *v1.Pod, claimsToProvision []*v
return false, nil, fmt.Errorf("failed to find storage class %q", className)
}
provisioner := class.Provisioner
if provisioner == "" || provisioner == notSupportedProvisioner {
if provisioner == "" || provisioner == pvutil.NotSupportedProvisioner {
klog.V(4).Infof("storage class %q of claim %q does not support dynamic provisioning", className, pvcName)
return false, nil, nil
}
@ -790,3 +775,7 @@ func (a byPVCSize) Less(i, j int) bool {
// return true if iSize is less than jSize
return iSize.Cmp(jSize) == -1
}
func claimToClaimKey(claim *v1.PersistentVolumeClaim) string {
return fmt.Sprintf("%s/%s", claim.Namespace, claim.Name)
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
package scheduling
import (
"sync"
@ -22,7 +22,7 @@ import (
"k8s.io/api/core/v1"
)
// podBindingCache stores PV binding decisions per pod per node.
// PodBindingCache stores PV binding decisions per pod per node.
// Pod entries are removed when the Pod is deleted or updated to
// no longer be schedulable.
type PodBindingCache interface {
@ -69,6 +69,7 @@ type nodeDecision struct {
provisionings []*v1.PersistentVolumeClaim
}
// NewPodBindingCache creates a pod binding cache.
func NewPodBindingCache() PodBindingCache {
return &podBindingCache{bindingDecisions: map[string]nodeDecisions{}}
}

View File

@ -14,10 +14,11 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
package scheduling
import "k8s.io/api/core/v1"
// FakeVolumeBinderConfig holds configurations for fake volume binder.
type FakeVolumeBinderConfig struct {
AllBound bool
FindUnboundSatsified bool
@ -27,7 +28,7 @@ type FakeVolumeBinderConfig struct {
BindErr error
}
// NewVolumeBinder sets up all the caches needed for the scheduler to make
// NewFakeVolumeBinder sets up all the caches needed for the scheduler to make
// topology-aware volume binding decisions.
func NewFakeVolumeBinder(config *FakeVolumeBinderConfig) *FakeVolumeBinder {
return &FakeVolumeBinder{
@ -35,26 +36,31 @@ func NewFakeVolumeBinder(config *FakeVolumeBinderConfig) *FakeVolumeBinder {
}
}
// FakeVolumeBinder represents a fake volume binder for testing.
type FakeVolumeBinder struct {
config *FakeVolumeBinderConfig
AssumeCalled bool
BindCalled bool
}
// FindPodVolumes implements SchedulerVolumeBinder.FindPodVolumes.
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolumesSatisfied, boundVolumesSatsified bool, err error) {
return b.config.FindUnboundSatsified, b.config.FindBoundSatsified, b.config.FindErr
}
// AssumePodVolumes implements SchedulerVolumeBinder.AssumePodVolumes.
func (b *FakeVolumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (bool, error) {
b.AssumeCalled = true
return b.config.AllBound, b.config.AssumeErr
}
// BindPodVolumes implements SchedulerVolumeBinder.BindPodVolumes.
func (b *FakeVolumeBinder) BindPodVolumes(assumedPod *v1.Pod) error {
b.BindCalled = true
return b.config.BindErr
}
// GetBindingsCache implements SchedulerVolumeBinder.GetBindingsCache.
func (b *FakeVolumeBinder) GetBindingsCache() PodBindingCache {
return nil
}