2018-01-09 18:57:14 +00:00
/ *
Copyright 2017 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package kubeletconfig
import (
"fmt"
"path/filepath"
"time"
2018-07-18 14:47:22 +00:00
apiequality "k8s.io/apimachinery/pkg/api/equality"
2018-01-09 18:57:14 +00:00
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
2018-03-06 22:33:18 +00:00
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
2018-01-09 18:57:14 +00:00
"k8s.io/client-go/tools/cache"
2018-11-26 18:23:56 +00:00
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/apis/config/validation"
2018-01-09 18:57:14 +00:00
2018-03-06 22:33:18 +00:00
"k8s.io/kubernetes/pkg/kubelet/kubeletconfig/checkpoint"
2018-01-09 18:57:14 +00:00
"k8s.io/kubernetes/pkg/kubelet/kubeletconfig/checkpoint/store"
"k8s.io/kubernetes/pkg/kubelet/kubeletconfig/status"
utillog "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/log"
utilpanic "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/panic"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
)
const (
2018-07-18 14:47:22 +00:00
storeDir = "store"
2018-03-06 22:33:18 +00:00
// TODO(mtaufen): We may expose this in a future API, but for the time being we use an internal default,
// because it is not especially clear where this should live in the API.
configTrialDuration = 10 * time . Minute
2018-01-09 18:57:14 +00:00
)
2018-07-18 14:47:22 +00:00
// TransformFunc edits the KubeletConfiguration in-place, and returns an
// error if any of the transformations failed.
type TransformFunc func ( kc * kubeletconfig . KubeletConfiguration ) error
2018-03-06 22:33:18 +00:00
// Controller manages syncing dynamic Kubelet configurations
2018-01-09 18:57:14 +00:00
// For more information, see the proposal: https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/dynamic-kubelet-configuration.md
type Controller struct {
2018-07-18 14:47:22 +00:00
// transform applies an arbitrary transformation to config after loading, and before validation.
// This can be used, for example, to include config from flags before the controller's validation step.
// If transform returns an error, loadConfig will fail, and an InternalError will be reported.
// Be wary if using this function as an extension point, in most cases the controller should
// probably just be natively extended to do what you need. Injecting flag precedence transformations
// is something of an exception because the caller of this controller (cmd/) is aware of flags, but this
// controller's tree (pkg/) is not.
transform TransformFunc
2018-01-09 18:57:14 +00:00
// pendingConfigSource; write to this channel to indicate that the config source needs to be synced from the API server
pendingConfigSource chan bool
2018-07-18 14:47:22 +00:00
// configStatus manages the status we report on the Node object
configStatus status . NodeConfigStatus
// nodeInformer is the informer that watches the Node object
nodeInformer cache . SharedInformer
2018-01-09 18:57:14 +00:00
2018-07-18 14:47:22 +00:00
// remoteConfigSourceInformer is the informer that watches the assigned config source
remoteConfigSourceInformer cache . SharedInformer
2018-01-09 18:57:14 +00:00
// checkpointStore persists config source checkpoints to a storage layer
checkpointStore store . Store
}
2018-07-18 14:47:22 +00:00
// NewController constructs a new Controller object and returns it. The dynamicConfigDir
// path must be absolute. transform applies an arbitrary transformation to config after loading, and before validation.
// This can be used, for example, to include config from flags before the controller's validation step.
// If transform returns an error, loadConfig will fail, and an InternalError will be reported.
// Be wary if using this function as an extension point, in most cases the controller should
// probably just be natively extended to do what you need. Injecting flag precedence transformations
// is something of an exception because the caller of this controller (cmd/) is aware of flags, but this
// controller's tree (pkg/) is not.
func NewController ( dynamicConfigDir string , transform TransformFunc ) * Controller {
2018-01-09 18:57:14 +00:00
return & Controller {
2018-07-18 14:47:22 +00:00
transform : transform ,
2018-01-09 18:57:14 +00:00
// channels must have capacity at least 1, since we signal with non-blocking writes
pendingConfigSource : make ( chan bool , 1 ) ,
2018-07-18 14:47:22 +00:00
configStatus : status . NewNodeConfigStatus ( ) ,
checkpointStore : store . NewFsStore ( utilfs . DefaultFs { } , filepath . Join ( dynamicConfigDir , storeDir ) ) ,
2018-03-06 22:33:18 +00:00
}
2018-01-09 18:57:14 +00:00
}
// Bootstrap attempts to return a valid KubeletConfiguration based on the configuration of the Controller,
// or returns an error if no valid configuration could be produced. Bootstrap should be called synchronously before StartSync.
2018-07-18 14:47:22 +00:00
// If the pre-existing local configuration should be used, Bootstrap returns a nil config.
2018-01-09 18:57:14 +00:00
func ( cc * Controller ) Bootstrap ( ) ( * kubeletconfig . KubeletConfiguration , error ) {
utillog . Infof ( "starting controller" )
2018-03-06 22:33:18 +00:00
// ensure the filesystem is initialized
if err := cc . initializeDynamicConfigDir ( ) ; err != nil {
2018-01-09 18:57:14 +00:00
return nil , err
}
2018-07-18 14:47:22 +00:00
// determine assigned source and set status
assignedSource , err := cc . checkpointStore . Assigned ( )
if err != nil {
return nil , err
}
if assignedSource != nil {
cc . configStatus . SetAssigned ( assignedSource . NodeConfigSource ( ) )
}
// determine last-known-good source and set status
lastKnownGoodSource , err := cc . checkpointStore . LastKnownGood ( )
if err != nil {
return nil , err
}
if lastKnownGoodSource != nil {
cc . configStatus . SetLastKnownGood ( lastKnownGoodSource . NodeConfigSource ( ) )
}
// if the assigned source is nil, return nil to indicate local config
if assignedSource == nil {
return nil , nil
}
// attempt to load assigned config
assignedConfig , reason , err := cc . loadConfig ( assignedSource )
2018-03-06 22:33:18 +00:00
if err == nil {
2018-07-18 14:47:22 +00:00
// update the active source to the non-nil assigned source
cc . configStatus . SetActive ( assignedSource . NodeConfigSource ( ) )
2018-03-06 22:33:18 +00:00
// update the last-known-good config if necessary, and start a timer that
// periodically checks whether the last-known good needs to be updated
// we only do this when the assigned config loads and passes validation
// wait.Forever will call the func once before starting the timer
go wait . Forever ( func ( ) { cc . checkTrial ( configTrialDuration ) } , 10 * time . Second )
2018-01-09 18:57:14 +00:00
2018-07-18 14:47:22 +00:00
return assignedConfig , nil
} // Assert: the assigned config failed to load or validate
2018-01-09 18:57:14 +00:00
2018-03-06 22:33:18 +00:00
// TODO(mtaufen): consider re-attempting download when a load/verify/parse/validate
2018-01-09 18:57:14 +00:00
// error happens outside trial period, we already made it past the trial so it's probably filesystem corruption
2018-07-18 14:47:22 +00:00
// or something else scary
2018-01-09 18:57:14 +00:00
2018-03-06 22:33:18 +00:00
// log the reason and error details for the failure to load the assigned config
utillog . Errorf ( fmt . Sprintf ( "%s, error: %v" , reason , err ) )
2018-01-09 18:57:14 +00:00
2018-07-18 14:47:22 +00:00
// set status to indicate the failure with the assigned config
cc . configStatus . SetError ( reason )
// if the last-known-good source is nil, return nil to indicate local config
if lastKnownGoodSource == nil {
return nil , nil
2018-01-09 18:57:14 +00:00
}
2018-07-18 14:47:22 +00:00
// attempt to load the last-known-good config
lastKnownGoodConfig , _ , err := cc . loadConfig ( lastKnownGoodSource )
if err != nil {
// we failed to load the last-known-good, so something is really messed up and we just return the error
return nil , err
2018-01-09 18:57:14 +00:00
}
2018-07-18 14:47:22 +00:00
// set status to indicate the active source is the non-nil last-known-good source
cc . configStatus . SetActive ( lastKnownGoodSource . NodeConfigSource ( ) )
return lastKnownGoodConfig , nil
2018-01-09 18:57:14 +00:00
}
2018-07-18 14:47:22 +00:00
// StartSync tells the controller to start the goroutines that sync status/config to/from the API server.
// The clients must be non-nil, and the nodeName must be non-empty.
func ( cc * Controller ) StartSync ( client clientset . Interface , eventClient v1core . EventsGetter , nodeName string ) error {
const errFmt = "cannot start Kubelet config sync: %s"
2018-01-09 18:57:14 +00:00
if client == nil {
2018-07-18 14:47:22 +00:00
return fmt . Errorf ( errFmt , "nil client" )
}
if eventClient == nil {
return fmt . Errorf ( errFmt , "nil event client" )
}
if nodeName == "" {
return fmt . Errorf ( errFmt , "empty nodeName" )
2018-01-09 18:57:14 +00:00
}
2018-03-06 22:33:18 +00:00
// Rather than use utilruntime.HandleCrash, which doesn't actually crash in the Kubelet,
// we use HandlePanic to manually call the panic handlers and then crash.
// We have a better chance of recovering normal operation if we just restart the Kubelet in the event
// of a Go runtime error.
2018-07-18 14:47:22 +00:00
// NOTE(mtaufen): utilpanic.HandlePanic returns a function and you have to call it for your thing to run!
// This was EVIL to debug (difficult to see missing `()`).
// The code now uses `go name()` instead of `go utilpanic.HandlePanic(func(){...})()` to avoid confusion.
// status sync worker
statusSyncLoopFunc := utilpanic . HandlePanic ( func ( ) {
utillog . Infof ( "starting status sync loop" )
2018-03-06 22:33:18 +00:00
wait . JitterUntil ( func ( ) {
2018-07-18 14:47:22 +00:00
cc . configStatus . Sync ( client , nodeName )
2018-03-06 22:33:18 +00:00
} , 10 * time . Second , 0.2 , true , wait . NeverStop )
2018-07-18 14:47:22 +00:00
} )
// remote config source informer, if we have a remote source to watch
assignedSource , err := cc . checkpointStore . Assigned ( )
2018-03-06 22:33:18 +00:00
if err != nil {
2018-07-18 14:47:22 +00:00
return fmt . Errorf ( errFmt , err )
} else if assignedSource == nil {
utillog . Infof ( "local source is assigned, will not start remote config source informer" )
} else {
cc . remoteConfigSourceInformer = assignedSource . Informer ( client , cache . ResourceEventHandlerFuncs {
AddFunc : cc . onAddRemoteConfigSourceEvent ,
UpdateFunc : cc . onUpdateRemoteConfigSourceEvent ,
DeleteFunc : cc . onDeleteRemoteConfigSourceEvent ,
} ,
)
2018-03-06 22:33:18 +00:00
}
2018-07-18 14:47:22 +00:00
remoteConfigSourceInformerFunc := utilpanic . HandlePanic ( func ( ) {
if cc . remoteConfigSourceInformer != nil {
utillog . Infof ( "starting remote config source informer" )
cc . remoteConfigSourceInformer . Run ( wait . NeverStop )
}
} )
// node informer
cc . nodeInformer = newSharedNodeInformer ( client , nodeName ,
cc . onAddNodeEvent , cc . onUpdateNodeEvent , cc . onDeleteNodeEvent )
nodeInformerFunc := utilpanic . HandlePanic ( func ( ) {
utillog . Infof ( "starting Node informer" )
cc . nodeInformer . Run ( wait . NeverStop )
} )
// config sync worker
configSyncLoopFunc := utilpanic . HandlePanic ( func ( ) {
utillog . Infof ( "starting Kubelet config sync loop" )
wait . JitterUntil ( func ( ) {
cc . syncConfigSource ( client , eventClient , nodeName )
} , 10 * time . Second , 0.2 , true , wait . NeverStop )
} )
go statusSyncLoopFunc ( )
go remoteConfigSourceInformerFunc ( )
go nodeInformerFunc ( )
go configSyncLoopFunc ( )
return nil
2018-01-09 18:57:14 +00:00
}
2018-07-18 14:47:22 +00:00
// loadConfig loads Kubelet config from a checkpoint
// It returns the loaded configuration or a clean failure reason (for status reporting) and an error.
func ( cc * Controller ) loadConfig ( source checkpoint . RemoteConfigSource ) ( * kubeletconfig . KubeletConfiguration , string , error ) {
// load KubeletConfiguration from checkpoint
kc , err := cc . checkpointStore . Load ( source )
2018-03-06 22:33:18 +00:00
if err != nil {
2018-07-18 14:47:22 +00:00
return nil , status . LoadError , err
2018-03-06 22:33:18 +00:00
}
2018-07-18 14:47:22 +00:00
// apply any required transformations to the KubeletConfiguration
if cc . transform != nil {
if err := cc . transform ( kc ) ; err != nil {
return nil , status . InternalError , err
}
2018-03-06 22:33:18 +00:00
}
2018-07-18 14:47:22 +00:00
// validate the result
if err := validation . ValidateKubeletConfiguration ( kc ) ; err != nil {
return nil , status . ValidateError , err
2018-03-06 22:33:18 +00:00
}
2018-07-18 14:47:22 +00:00
return kc , "" , nil
2018-03-06 22:33:18 +00:00
}
// initializeDynamicConfigDir makes sure that the storage layers for various controller components are set up correctly
func ( cc * Controller ) initializeDynamicConfigDir ( ) error {
2018-01-09 18:57:14 +00:00
utillog . Infof ( "ensuring filesystem is set up correctly" )
2018-03-06 22:33:18 +00:00
// initializeDynamicConfigDir local checkpoint storage location
2018-01-09 18:57:14 +00:00
return cc . checkpointStore . Initialize ( )
}
2018-03-06 22:33:18 +00:00
// checkTrial checks whether the trial duration has passed, and updates the last-known-good config if necessary
func ( cc * Controller ) checkTrial ( duration time . Duration ) {
// when the trial period is over, the assigned config becomes the last-known-good
if trial , err := cc . inTrial ( duration ) ; err != nil {
utillog . Errorf ( "failed to check trial period for assigned config, error: %v" , err )
} else if ! trial {
if err := cc . graduateAssignedToLastKnownGood ( ) ; err != nil {
utillog . Errorf ( "failed to set last-known-good to assigned config, error: %v" , err )
}
2018-01-09 18:57:14 +00:00
}
}
2018-07-18 14:47:22 +00:00
// inTrial returns true if the time elapsed since the last modification of the assigned config does not exceed `trialDur`, false otherwise
2018-01-09 18:57:14 +00:00
func ( cc * Controller ) inTrial ( trialDur time . Duration ) ( bool , error ) {
now := time . Now ( )
2018-07-18 14:47:22 +00:00
t , err := cc . checkpointStore . AssignedModified ( )
2018-01-09 18:57:14 +00:00
if err != nil {
return false , err
}
if now . Sub ( t ) <= trialDur {
return true , nil
}
return false , nil
}
2018-07-18 14:47:22 +00:00
// graduateAssignedToLastKnownGood sets the last-known-good in the checkpointStore
// to the same value as the assigned config maintained by the checkpointStore
2018-03-06 22:33:18 +00:00
func ( cc * Controller ) graduateAssignedToLastKnownGood ( ) error {
2018-07-18 14:47:22 +00:00
// get assigned
assigned , err := cc . checkpointStore . Assigned ( )
2018-01-09 18:57:14 +00:00
if err != nil {
2018-03-06 22:33:18 +00:00
return err
2018-01-09 18:57:14 +00:00
}
2018-07-18 14:47:22 +00:00
// get last-known-good
lastKnownGood , err := cc . checkpointStore . LastKnownGood ( )
if err != nil {
return err
}
// if the sources are equal, no need to change
if assigned == lastKnownGood ||
2018-11-26 18:23:56 +00:00
assigned != nil && lastKnownGood != nil && apiequality . Semantic . DeepEqual ( assigned . NodeConfigSource ( ) , lastKnownGood . NodeConfigSource ( ) ) {
2018-07-18 14:47:22 +00:00
return nil
}
// update last-known-good
err = cc . checkpointStore . SetLastKnownGood ( assigned )
2018-01-09 18:57:14 +00:00
if err != nil {
2018-03-06 22:33:18 +00:00
return err
2018-01-09 18:57:14 +00:00
}
2018-07-18 14:47:22 +00:00
// update the status to reflect the new last-known-good config
cc . configStatus . SetLastKnownGood ( assigned . NodeConfigSource ( ) )
utillog . Infof ( "updated last-known-good config to %s, UID: %s, ResourceVersion: %s" , assigned . APIPath ( ) , assigned . UID ( ) , assigned . ResourceVersion ( ) )
2018-01-09 18:57:14 +00:00
return nil
}