rebase: update all k8s packages to 0.27.2

Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
Niels de Vos
2023-06-01 18:58:10 +02:00
committed by mergify[bot]
parent 07b05616a0
commit 2551a0b05f
618 changed files with 42944 additions and 16168 deletions

View File

@ -34,6 +34,7 @@ import (
jsonpatch "github.com/evanphx/json-patch"
"github.com/google/uuid"
"golang.org/x/crypto/cryptobyte"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@ -126,6 +127,7 @@ type Config struct {
EnableIndex bool
EnableProfiling bool
DebugSocketPath string
EnableDiscovery bool
// Requires generic profiling enabled
@ -155,8 +157,14 @@ type Config struct {
// BuildHandlerChainFunc allows you to build custom handler chains by decorating the apiHandler.
BuildHandlerChainFunc func(apiHandler http.Handler, c *Config) (secure http.Handler)
// HandlerChainWaitGroup allows you to wait for all chain handlers exit after the server shutdown.
HandlerChainWaitGroup *utilwaitgroup.SafeWaitGroup
// NonLongRunningRequestWaitGroup allows you to wait for all chain
// handlers associated with non long-running requests
// to complete while the server is shuting down.
NonLongRunningRequestWaitGroup *utilwaitgroup.SafeWaitGroup
// WatchRequestWaitGroup allows us to wait for all chain
// handlers associated with active watch requests to
// complete while the server is shuting down.
WatchRequestWaitGroup *utilwaitgroup.RateLimitedSafeWaitGroup
// DiscoveryAddresses is used to build the IPs pass to discovery. If nil, the ExternalAddress is
// always reported
DiscoveryAddresses discovery.Addresses
@ -268,6 +276,23 @@ type Config struct {
// AggregatedDiscoveryGroupManager serves /apis in an aggregated form.
AggregatedDiscoveryGroupManager discoveryendpoint.ResourceManager
// ShutdownWatchTerminationGracePeriod, if set to a positive value,
// is the maximum duration the apiserver will wait for all active
// watch request(s) to drain.
// Once this grace period elapses, the apiserver will no longer
// wait for any active watch request(s) in flight to drain, it will
// proceed to the next step in the graceful server shutdown process.
// If set to a positive value, the apiserver will keep track of the
// number of active watch request(s) in flight and during shutdown
// it will wait, at most, for the specified duration and allow these
// active watch requests to drain with some rate limiting in effect.
// The default is zero, which implies the apiserver will not keep
// track of active watch request(s) in flight and will not wait
// for them to drain, this maintains backward compatibility.
// This grace period is orthogonal to other grace periods, and
// it is not overridden by any other grace period.
ShutdownWatchTerminationGracePeriod time.Duration
}
type RecommendedConfig struct {
@ -320,6 +345,8 @@ type AuthenticationInfo struct {
APIAudiences authenticator.Audiences
// Authenticator determines which subject is making the request
Authenticator authenticator.Request
RequestHeaderConfig *authenticatorfactory.RequestHeaderConfig
}
type AuthorizationInfo struct {
@ -342,31 +369,49 @@ func NewConfig(codecs serializer.CodecFactory) *Config {
klog.Fatalf("error getting hostname for apiserver identity: %v", err)
}
hash := sha256.Sum256([]byte(hostname))
id = "kube-apiserver-" + strings.ToLower(base32.StdEncoding.WithPadding(base32.NoPadding).EncodeToString(hash[:16]))
// Since the hash needs to be unique across each kube-apiserver and aggregated apiservers,
// the hash used for the identity should include both the hostname and the identity value.
// TODO: receive the identity value as a parameter once the apiserver identity lease controller
// post start hook is moved to generic apiserver.
b := cryptobyte.NewBuilder(nil)
b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
b.AddBytes([]byte(hostname))
})
b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
b.AddBytes([]byte("kube-apiserver"))
})
hashData, err := b.Bytes()
if err != nil {
klog.Fatalf("error building hash data for apiserver identity: %v", err)
}
hash := sha256.Sum256(hashData)
id = "apiserver-" + strings.ToLower(base32.StdEncoding.WithPadding(base32.NoPadding).EncodeToString(hash[:16]))
}
lifecycleSignals := newLifecycleSignals()
return &Config{
Serializer: codecs,
BuildHandlerChainFunc: DefaultBuildHandlerChain,
HandlerChainWaitGroup: new(utilwaitgroup.SafeWaitGroup),
LegacyAPIGroupPrefixes: sets.NewString(DefaultLegacyAPIPrefix),
DisabledPostStartHooks: sets.NewString(),
PostStartHooks: map[string]PostStartHookConfigEntry{},
HealthzChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...),
ReadyzChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...),
LivezChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...),
EnableIndex: true,
EnableDiscovery: true,
EnableProfiling: true,
EnableMetrics: true,
MaxRequestsInFlight: 400,
MaxMutatingRequestsInFlight: 200,
RequestTimeout: time.Duration(60) * time.Second,
MinRequestTimeout: 1800,
LivezGracePeriod: time.Duration(0),
ShutdownDelayDuration: time.Duration(0),
Serializer: codecs,
BuildHandlerChainFunc: DefaultBuildHandlerChain,
NonLongRunningRequestWaitGroup: new(utilwaitgroup.SafeWaitGroup),
WatchRequestWaitGroup: &utilwaitgroup.RateLimitedSafeWaitGroup{},
LegacyAPIGroupPrefixes: sets.NewString(DefaultLegacyAPIPrefix),
DisabledPostStartHooks: sets.NewString(),
PostStartHooks: map[string]PostStartHookConfigEntry{},
HealthzChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...),
ReadyzChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...),
LivezChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...),
EnableIndex: true,
EnableDiscovery: true,
EnableProfiling: true,
DebugSocketPath: "",
EnableMetrics: true,
MaxRequestsInFlight: 400,
MaxMutatingRequestsInFlight: 200,
RequestTimeout: time.Duration(60) * time.Second,
MinRequestTimeout: 1800,
LivezGracePeriod: time.Duration(0),
ShutdownDelayDuration: time.Duration(0),
// 1.5MB is the default client request size in bytes
// the etcd server should accept. See
// https://github.com/etcd-io/etcd/blob/release-3.4/embed/config.go#L56.
@ -387,9 +432,10 @@ func NewConfig(codecs serializer.CodecFactory) *Config {
// Default to treating watch as a long-running operation
// Generic API servers have no inherent long-running subresources
LongRunningFunc: genericfilters.BasicLongRunningRequestCheck(sets.NewString("watch"), sets.NewString()),
lifecycleSignals: lifecycleSignals,
StorageObjectCountTracker: flowcontrolrequest.NewStorageObjectCountTracker(),
LongRunningFunc: genericfilters.BasicLongRunningRequestCheck(sets.NewString("watch"), sets.NewString()),
lifecycleSignals: lifecycleSignals,
StorageObjectCountTracker: flowcontrolrequest.NewStorageObjectCountTracker(),
ShutdownWatchTerminationGracePeriod: time.Duration(0),
APIServerID: id,
StorageVersionManager: storageversion.NewDefaultManager(),
@ -631,28 +677,36 @@ func (c completedConfig) New(name string, delegationTarget DelegationTarget) (*G
return c.BuildHandlerChainFunc(handler, c.Config)
}
var debugSocket *routes.DebugSocket
if c.DebugSocketPath != "" {
debugSocket = routes.NewDebugSocket(c.DebugSocketPath)
}
apiServerHandler := NewAPIServerHandler(name, c.Serializer, handlerChainBuilder, delegationTarget.UnprotectedHandler())
s := &GenericAPIServer{
discoveryAddresses: c.DiscoveryAddresses,
LoopbackClientConfig: c.LoopbackClientConfig,
legacyAPIGroupPrefixes: c.LegacyAPIGroupPrefixes,
admissionControl: c.AdmissionControl,
Serializer: c.Serializer,
AuditBackend: c.AuditBackend,
Authorizer: c.Authorization.Authorizer,
delegationTarget: delegationTarget,
EquivalentResourceRegistry: c.EquivalentResourceRegistry,
HandlerChainWaitGroup: c.HandlerChainWaitGroup,
Handler: apiServerHandler,
discoveryAddresses: c.DiscoveryAddresses,
LoopbackClientConfig: c.LoopbackClientConfig,
legacyAPIGroupPrefixes: c.LegacyAPIGroupPrefixes,
admissionControl: c.AdmissionControl,
Serializer: c.Serializer,
AuditBackend: c.AuditBackend,
Authorizer: c.Authorization.Authorizer,
delegationTarget: delegationTarget,
EquivalentResourceRegistry: c.EquivalentResourceRegistry,
NonLongRunningRequestWaitGroup: c.NonLongRunningRequestWaitGroup,
WatchRequestWaitGroup: c.WatchRequestWaitGroup,
Handler: apiServerHandler,
UnprotectedDebugSocket: debugSocket,
listedPathProvider: apiServerHandler,
minRequestTimeout: time.Duration(c.MinRequestTimeout) * time.Second,
ShutdownTimeout: c.RequestTimeout,
ShutdownDelayDuration: c.ShutdownDelayDuration,
SecureServingInfo: c.SecureServing,
ExternalAddress: c.ExternalAddress,
minRequestTimeout: time.Duration(c.MinRequestTimeout) * time.Second,
ShutdownTimeout: c.RequestTimeout,
ShutdownDelayDuration: c.ShutdownDelayDuration,
ShutdownWatchTerminationGracePeriod: c.ShutdownWatchTerminationGracePeriod,
SecureServingInfo: c.SecureServing,
ExternalAddress: c.ExternalAddress,
openAPIConfig: c.OpenAPIConfig,
openAPIV3Config: c.OpenAPIV3Config,
@ -686,10 +740,10 @@ func (c completedConfig) New(name string, delegationTarget DelegationTarget) (*G
if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.AggregatedDiscoveryEndpoint) {
manager := c.AggregatedDiscoveryGroupManager
if manager == nil {
manager = discoveryendpoint.NewResourceManager()
manager = discoveryendpoint.NewResourceManager("apis")
}
s.AggregatedDiscoveryGroupManager = manager
s.AggregatedLegacyDiscoveryGroupManager = discoveryendpoint.NewResourceManager()
s.AggregatedLegacyDiscoveryGroupManager = discoveryendpoint.NewResourceManager("api")
}
for {
if c.JSONPatchMaxCopyBytes <= 0 {
@ -868,7 +922,7 @@ func DefaultBuildHandlerChain(apiHandler http.Handler, c *Config) http.Handler {
failedHandler = filterlatency.TrackCompleted(failedHandler)
handler = filterlatency.TrackCompleted(handler)
handler = genericapifilters.WithAuthentication(handler, c.Authentication.Authenticator, failedHandler, c.Authentication.APIAudiences)
handler = genericapifilters.WithAuthentication(handler, c.Authentication.Authenticator, failedHandler, c.Authentication.APIAudiences, c.Authentication.RequestHeaderConfig)
handler = filterlatency.TrackStarted(handler, c.TracerProvider, "authentication")
handler = genericfilters.WithCORS(handler, c.CorsAllowedOriginList, nil, nil, nil, "true")
@ -879,7 +933,10 @@ func DefaultBuildHandlerChain(apiHandler http.Handler, c *Config) http.Handler {
handler = genericapifilters.WithRequestDeadline(handler, c.AuditBackend, c.AuditPolicyRuleEvaluator,
c.LongRunningFunc, c.Serializer, c.RequestTimeout)
handler = genericfilters.WithWaitGroup(handler, c.LongRunningFunc, c.HandlerChainWaitGroup)
handler = genericfilters.WithWaitGroup(handler, c.LongRunningFunc, c.NonLongRunningRequestWaitGroup)
if c.ShutdownWatchTerminationGracePeriod > 0 {
handler = genericfilters.WithWatchTerminationDuringShutdown(handler, c.lifecycleSignals, c.WatchRequestWaitGroup)
}
if c.SecureServing != nil && !c.SecureServing.DisableHTTP2 && c.GoawayChance > 0 {
handler = genericfilters.WithProbabilisticGoaway(handler, c.GoawayChance)
}
@ -914,6 +971,13 @@ func installAPI(s *GenericAPIServer, c *Config) {
// so far, only logging related endpoints are considered valid to add for these debug flags.
routes.DebugFlags{}.Install(s.Handler.NonGoRestfulMux, "v", routes.StringFlagPutHandler(logs.GlogSetter))
}
if s.UnprotectedDebugSocket != nil {
s.UnprotectedDebugSocket.InstallProfiling()
s.UnprotectedDebugSocket.InstallDebugFlag("v", routes.StringFlagPutHandler(logs.GlogSetter))
if c.EnableContentionProfiling {
goruntime.SetBlockProfileRate(1)
}
}
if c.EnableMetrics {
if c.EnableProfiling {

View File

@ -38,44 +38,76 @@ func WithCORS(handler http.Handler, allowedOriginPatterns []string, allowedMetho
return handler
}
allowedOriginPatternsREs := allowedOriginRegexps(allowedOriginPatterns)
// Set defaults for methods and headers if nothing was passed
if allowedMethods == nil {
allowedMethods = []string{"POST", "GET", "OPTIONS", "PUT", "DELETE", "PATCH"}
}
allowMethodsResponseHeader := strings.Join(allowedMethods, ", ")
if allowedHeaders == nil {
allowedHeaders = []string{"Content-Type", "Content-Length", "Accept-Encoding", "X-CSRF-Token", "Authorization", "X-Requested-With", "If-Modified-Since"}
}
allowHeadersResponseHeader := strings.Join(allowedHeaders, ", ")
if exposedHeaders == nil {
exposedHeaders = []string{"Date"}
}
exposeHeadersResponseHeader := strings.Join(exposedHeaders, ", ")
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
origin := req.Header.Get("Origin")
if origin != "" {
allowed := false
for _, re := range allowedOriginPatternsREs {
if allowed = re.MatchString(origin); allowed {
break
}
}
if allowed {
w.Header().Set("Access-Control-Allow-Origin", origin)
// Set defaults for methods and headers if nothing was passed
if allowedMethods == nil {
allowedMethods = []string{"POST", "GET", "OPTIONS", "PUT", "DELETE", "PATCH"}
}
if allowedHeaders == nil {
allowedHeaders = []string{"Content-Type", "Content-Length", "Accept-Encoding", "X-CSRF-Token", "Authorization", "X-Requested-With", "If-Modified-Since"}
}
if exposedHeaders == nil {
exposedHeaders = []string{"Date"}
}
w.Header().Set("Access-Control-Allow-Methods", strings.Join(allowedMethods, ", "))
w.Header().Set("Access-Control-Allow-Headers", strings.Join(allowedHeaders, ", "))
w.Header().Set("Access-Control-Expose-Headers", strings.Join(exposedHeaders, ", "))
w.Header().Set("Access-Control-Allow-Credentials", allowCredentials)
// Stop here if its a preflight OPTIONS request
if req.Method == "OPTIONS" {
w.WriteHeader(http.StatusNoContent)
return
}
}
if origin == "" {
handler.ServeHTTP(w, req)
return
}
if !isOriginAllowed(origin, allowedOriginPatternsREs) {
handler.ServeHTTP(w, req)
return
}
w.Header().Set("Access-Control-Allow-Origin", origin)
w.Header().Set("Access-Control-Allow-Methods", allowMethodsResponseHeader)
w.Header().Set("Access-Control-Allow-Headers", allowHeadersResponseHeader)
w.Header().Set("Access-Control-Expose-Headers", exposeHeadersResponseHeader)
w.Header().Set("Access-Control-Allow-Credentials", allowCredentials)
// Stop here if its a preflight OPTIONS request
if req.Method == "OPTIONS" {
w.WriteHeader(http.StatusNoContent)
return
}
// Dispatch to the next handler
handler.ServeHTTP(w, req)
})
}
// isOriginAllowed returns true if the given origin header in the
// request is allowed CORS.
//
// From https://www.rfc-editor.org/rfc/rfc6454#page-13
//
// a) The origin header can contain host and/or port
// serialized-origin = scheme "://" host [ ":" port ]
//
// b) In some cases, a number of origins contribute to causing the user
// agents to issue an HTTP request. In those cases, the user agent MAY
// list all the origins in the Origin header field. For example, if the
// HTTP request was initially issued by one origin but then later
// redirected by another origin, the user agent MAY inform the server
// that two origins were involved in causing the user agent to issue the
// request
// origin-list = serialized-origin *( SP serialized-origin )
func isOriginAllowed(originHeader string, allowedOriginPatternsREs []*regexp.Regexp) bool {
for _, re := range allowedOriginPatternsREs {
if re.MatchString(originHeader) {
return true
}
}
return false
}
func allowedOriginRegexps(allowedOrigins []string) []*regexp.Regexp {
res, err := compileRegexps(allowedOrigins)
if err != nil {

View File

@ -24,20 +24,34 @@ import (
"k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
utilwaitgroup "k8s.io/apimachinery/pkg/util/waitgroup"
"k8s.io/apiserver/pkg/endpoints/handlers/responsewriters"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/client-go/kubernetes/scheme"
)
// RequestWaitGroup helps with the accounting of request(s) that are in
// flight: the caller is expected to invoke Add(1) before executing the
// request handler and then invoke Done() when the handler finishes.
// NOTE: implementations must ensure that it is thread-safe
// when invoked from multiple goroutines.
type RequestWaitGroup interface {
// Add adds delta, which may be negative, similar to sync.WaitGroup.
// If Add with a positive delta happens after Wait, it will return error,
// which prevent unsafe Add.
Add(delta int) error
// Done decrements the WaitGroup counter.
Done()
}
// WithWaitGroup adds all non long-running requests to wait group, which is used for graceful shutdown.
func WithWaitGroup(handler http.Handler, longRunning apirequest.LongRunningRequestCheck, wg *utilwaitgroup.SafeWaitGroup) http.Handler {
func WithWaitGroup(handler http.Handler, longRunning apirequest.LongRunningRequestCheck, wg RequestWaitGroup) http.Handler {
// NOTE: both WithWaitGroup and WithRetryAfter must use the same exact isRequestExemptFunc 'isRequestExemptFromRetryAfter,
// otherwise SafeWaitGroup might wait indefinitely and will prevent the server from shutting down gracefully.
return withWaitGroup(handler, longRunning, wg, isRequestExemptFromRetryAfter)
}
func withWaitGroup(handler http.Handler, longRunning apirequest.LongRunningRequestCheck, wg *utilwaitgroup.SafeWaitGroup, isRequestExemptFn isRequestExemptFunc) http.Handler {
func withWaitGroup(handler http.Handler, longRunning apirequest.LongRunningRequestCheck, wg RequestWaitGroup, isRequestExemptFn isRequestExemptFunc) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
ctx := req.Context()
requestInfo, ok := apirequest.RequestInfoFrom(ctx)
@ -64,12 +78,7 @@ func withWaitGroup(handler http.Handler, longRunning apirequest.LongRunningReque
// When apiserver is shutting down, signal clients to retry
// There is a good chance the client hit a different server, so a tight retry is good for client responsiveness.
w.Header().Add("Retry-After", "1")
w.Header().Set("Content-Type", runtime.ContentTypeJSON)
w.Header().Set("X-Content-Type-Options", "nosniff")
statusErr := apierrors.NewServiceUnavailable("apiserver is shutting down").Status()
w.WriteHeader(int(statusErr.Code))
fmt.Fprintln(w, runtime.EncodeOrDie(scheme.Codecs.LegacyCodec(v1.SchemeGroupVersion), &statusErr))
waitGroupWriteRetryAfterToResponse(w)
return
}
@ -77,3 +86,12 @@ func withWaitGroup(handler http.Handler, longRunning apirequest.LongRunningReque
handler.ServeHTTP(w, req)
})
}
func waitGroupWriteRetryAfterToResponse(w http.ResponseWriter) {
w.Header().Add("Retry-After", "1")
w.Header().Set("Content-Type", runtime.ContentTypeJSON)
w.Header().Set("X-Content-Type-Options", "nosniff")
statusErr := apierrors.NewServiceUnavailable("apiserver is shutting down").Status()
w.WriteHeader(int(statusErr.Code))
fmt.Fprintln(w, runtime.EncodeOrDie(scheme.Codecs.LegacyCodec(v1.SchemeGroupVersion), &statusErr))
}

View File

@ -0,0 +1,62 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filters
import (
"errors"
"net/http"
"k8s.io/apiserver/pkg/endpoints/handlers/responsewriters"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/klog/v2"
)
func WithWatchTerminationDuringShutdown(handler http.Handler, termination apirequest.ServerShutdownSignal, wg RequestWaitGroup) http.Handler {
if termination == nil || wg == nil {
klog.Warningf("watch termination during shutdown not attached to the handler chain")
return handler
}
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
ctx := req.Context()
requestInfo, ok := apirequest.RequestInfoFrom(ctx)
if !ok {
// if this happens, the handler chain isn't setup correctly because there is no request info
responsewriters.InternalError(w, req, errors.New("no RequestInfo found in the context"))
return
}
if !watchVerbs.Has(requestInfo.Verb) {
handler.ServeHTTP(w, req)
return
}
if err := wg.Add(1); err != nil {
// When apiserver is shutting down, signal clients to retry
// There is a good chance the client hit a different server, so a tight retry is good for client responsiveness.
waitGroupWriteRetryAfterToResponse(w)
return
}
// attach ServerShutdownSignal to the watch request so that the
// watch handler loop can return as soon as the server signals
// that it is shutting down.
ctx = apirequest.WithServerShutdownSignal(req.Context(), termination)
req = req.WithContext(ctx)
defer wg.Done()
handler.ServeHTTP(w, req)
})
}

View File

@ -55,7 +55,7 @@ func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolve
return
}
http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError)
klog.ErrorS(nil, "apiserver panic'd", "method", req.Method, "URI", req.RequestURI, "audit-ID", audit.GetAuditIDTruncated(req.Context()))
klog.ErrorS(nil, "apiserver panic'd", "method", req.Method, "URI", req.RequestURI, "auditID", audit.GetAuditIDTruncated(req.Context()))
})
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package server
import (
"context"
"fmt"
"net/http"
gpath "path"
@ -26,12 +27,15 @@ import (
systemd "github.com/coreos/go-systemd/v22/daemon"
"golang.org/x/time/rate"
apidiscoveryv2beta1 "k8s.io/api/apidiscovery/v2beta1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/util/managedfields"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
utilwaitgroup "k8s.io/apimachinery/pkg/util/waitgroup"
"k8s.io/apimachinery/pkg/version"
@ -41,22 +45,19 @@ import (
genericapi "k8s.io/apiserver/pkg/endpoints"
"k8s.io/apiserver/pkg/endpoints/discovery"
discoveryendpoint "k8s.io/apiserver/pkg/endpoints/discovery/aggregated"
"k8s.io/apiserver/pkg/endpoints/handlers/fieldmanager"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/registry/rest"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/apiserver/pkg/server/routes"
"k8s.io/apiserver/pkg/storageversion"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilopenapi "k8s.io/apiserver/pkg/util/openapi"
restclient "k8s.io/client-go/rest"
"k8s.io/klog/v2"
openapibuilder2 "k8s.io/kube-openapi/pkg/builder"
openapibuilder3 "k8s.io/kube-openapi/pkg/builder3"
openapicommon "k8s.io/kube-openapi/pkg/common"
"k8s.io/kube-openapi/pkg/handler"
"k8s.io/kube-openapi/pkg/handler3"
openapiutil "k8s.io/kube-openapi/pkg/util"
openapiproto "k8s.io/kube-openapi/pkg/util/proto"
"k8s.io/kube-openapi/pkg/validation/spec"
"k8s.io/utils/clock"
)
@ -88,7 +89,7 @@ type APIGroupInfo struct {
// StaticOpenAPISpec is the spec derived from the definitions of all resources installed together.
// It is set during InstallAPIGroups, InstallAPIGroup, and InstallLegacyAPIGroup.
StaticOpenAPISpec *spec.Swagger
StaticOpenAPISpec map[string]*spec.Schema
}
func (a *APIGroupInfo) destroyStorage() {
@ -136,6 +137,10 @@ type GenericAPIServer struct {
// Handler holds the handlers being used by this API server
Handler *APIServerHandler
// UnprotectedDebugSocket is used to serve pprof information in a unix-domain socket. This socket is
// not protected by authentication/authorization.
UnprotectedDebugSocket *routes.DebugSocket
// listedPathProvider is a lister which provides the set of paths to show at /
listedPathProvider routes.ListedPathProvider
@ -214,8 +219,14 @@ type GenericAPIServer struct {
// delegationTarget is the next delegate in the chain. This is never nil.
delegationTarget DelegationTarget
// HandlerChainWaitGroup allows you to wait for all chain handlers finish after the server shutdown.
HandlerChainWaitGroup *utilwaitgroup.SafeWaitGroup
// NonLongRunningRequestWaitGroup allows you to wait for all chain
// handlers associated with non long-running requests
// to complete while the server is shuting down.
NonLongRunningRequestWaitGroup *utilwaitgroup.SafeWaitGroup
// WatchRequestWaitGroup allows us to wait for all chain
// handlers associated with active watch requests to
// complete while the server is shuting down.
WatchRequestWaitGroup *utilwaitgroup.RateLimitedSafeWaitGroup
// ShutdownDelayDuration allows to block shutdown for some time, e.g. until endpoints pointing to this API server
// have converged on all node. During this time, the API server keeps serving, /healthz will return 200,
@ -255,6 +266,23 @@ type GenericAPIServer struct {
// If enabled, after ShutdownDelayDuration elapses, any incoming request is
// rejected with a 429 status code and a 'Retry-After' response.
ShutdownSendRetryAfter bool
// ShutdownWatchTerminationGracePeriod, if set to a positive value,
// is the maximum duration the apiserver will wait for all active
// watch request(s) to drain.
// Once this grace period elapses, the apiserver will no longer
// wait for any active watch request(s) in flight to drain, it will
// proceed to the next step in the graceful server shutdown process.
// If set to a positive value, the apiserver will keep track of the
// number of active watch request(s) in flight and during shutdown
// it will wait, at most, for the specified duration and allow these
// active watch requests to drain with some rate limiting in effect.
// The default is zero, which implies the apiserver will not keep
// track of active watch request(s) in flight and will not wait
// for them to drain, this maintains backward compatibility.
// This grace period is orthogonal to other grace periods, and
// it is not overridden by any other grace period.
ShutdownWatchTerminationGracePeriod time.Duration
}
// DelegationTarget is an interface which allows for composition of API servers with top level handling that works
@ -442,23 +470,27 @@ func (s *GenericAPIServer) PrepareRun() preparedGenericAPIServer {
// | NotAcceptingNewRequest (notAcceptingNewRequestCh)
// | |
// | |
// | |---------------------------------------------------------|
// | | | | |
// | [without [with | |
// | ShutdownSendRetryAfter] ShutdownSendRetryAfter] | |
// | | | | |
// | | ---------------| |
// | | | |
// | | (HandlerChainWaitGroup::Wait) |
// | | | |
// | | InFlightRequestsDrained (drainedCh) |
// | | | |
// | ----------------------------------------|-----------------|
// | | |
// | |----------------------------------------------------------------------------------|
// | | | | |
// | [without [with | |
// | ShutdownSendRetryAfter] ShutdownSendRetryAfter] | |
// | | | | |
// | | ---------------| |
// | | | |
// | | |----------------|-----------------------| |
// | | | | |
// | | (NonLongRunningRequestWaitGroup::Wait) (WatchRequestWaitGroup::Wait) |
// | | | | |
// | | |------------------|---------------------| |
// | | | |
// | | InFlightRequestsDrained (drainedCh) |
// | | | |
// | |-------------------|---------------------|----------------------------------------|
// | | |
// | stopHttpServerCh (AuditBackend::Shutdown())
// | |
// | |
// | listenerStoppedCh
// | |
// | |
// | HTTPServerStoppedListening (httpServerStoppedListeningCh)
func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
delayedStopCh := s.lifecycleSignals.AfterShutdownDelayDuration
@ -467,6 +499,14 @@ func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
// Clean up resources on shutdown.
defer s.Destroy()
// If UDS profiling is enabled, start a local http server listening on that socket
if s.UnprotectedDebugSocket != nil {
go func() {
defer utilruntime.HandleCrash()
klog.Error(s.UnprotectedDebugSocket.Run(stopCh))
}()
}
// spawn a new goroutine for closing the MuxAndDiscoveryComplete signal
// registration happens during construction of the generic api server
// the last server in the chain aggregates signals from the previous instances
@ -509,7 +549,7 @@ func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
// net/http waits for 1s for the peer to respond to a GO_AWAY frame, so
// we should wait for a minimum of 2s
shutdownTimeout = 2 * time.Second
klog.V(1).InfoS("[graceful-termination] using HTTP Server shutdown timeout", "ShutdownTimeout", shutdownTimeout)
klog.V(1).InfoS("[graceful-termination] using HTTP Server shutdown timeout", "shutdownTimeout", shutdownTimeout)
}
notAcceptingNewRequestCh := s.lifecycleSignals.NotAcceptingNewRequest
@ -563,15 +603,17 @@ func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
<-preShutdownHooksHasStoppedCh.Signaled()
}()
// wait for all in-flight non-long running requests to finish
nonLongRunningRequestDrainedCh := make(chan struct{})
go func() {
defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", drainedCh.Name())
defer drainedCh.Signal()
defer close(nonLongRunningRequestDrainedCh)
defer klog.V(1).Info("[graceful-termination] in-flight non long-running request(s) have drained")
// wait for the delayed stopCh before closing the handler chain (it rejects everything after Wait has been called).
<-notAcceptingNewRequestCh.Signaled()
// Wait for all requests to finish, which are bounded by the RequestTimeout variable.
// once HandlerChainWaitGroup.Wait is invoked, the apiserver is
// once NonLongRunningRequestWaitGroup.Wait is invoked, the apiserver is
// expected to reject any incoming request with a {503, Retry-After}
// response via the WithWaitGroup filter. On the contrary, we observe
// that incoming request(s) get a 'connection refused' error, this is
@ -583,7 +625,48 @@ func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
// 'Server.Shutdown' will be invoked only after in-flight requests
// have been drained.
// TODO: can we consolidate these two modes of graceful termination?
s.HandlerChainWaitGroup.Wait()
s.NonLongRunningRequestWaitGroup.Wait()
}()
// wait for all in-flight watches to finish
activeWatchesDrainedCh := make(chan struct{})
go func() {
defer close(activeWatchesDrainedCh)
<-notAcceptingNewRequestCh.Signaled()
if s.ShutdownWatchTerminationGracePeriod <= time.Duration(0) {
klog.V(1).InfoS("[graceful-termination] not going to wait for active watch request(s) to drain")
return
}
// Wait for all active watches to finish
grace := s.ShutdownWatchTerminationGracePeriod
activeBefore, activeAfter, err := s.WatchRequestWaitGroup.Wait(func(count int) (utilwaitgroup.RateLimiter, context.Context, context.CancelFunc) {
qps := float64(count) / grace.Seconds()
// TODO: we don't want the QPS (max requests drained per second) to
// get below a certain floor value, since we want the server to
// drain the active watch requests as soon as possible.
// For now, it's hard coded to 200, and it is subject to change
// based on the result from the scale testing.
if qps < 200 {
qps = 200
}
ctx, cancel := context.WithTimeout(context.Background(), grace)
// We don't expect more than one token to be consumed
// in a single Wait call, so setting burst to 1.
return rate.NewLimiter(rate.Limit(qps), 1), ctx, cancel
})
klog.V(1).InfoS("[graceful-termination] active watch request(s) have drained",
"duration", grace, "activeWatchesBefore", activeBefore, "activeWatchesAfter", activeAfter, "error", err)
}()
go func() {
defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", drainedCh.Name())
defer drainedCh.Signal()
<-nonLongRunningRequestDrainedCh
<-activeWatchesDrainedCh
}()
klog.V(1).Info("[graceful-termination] waiting for shutdown to be initiated")
@ -653,7 +736,16 @@ func (s preparedGenericAPIServer) NonBlockingRun(stopCh <-chan struct{}, shutdow
}
// installAPIResources is a private method for installing the REST storage backing each api groupversionresource
func (s *GenericAPIServer) installAPIResources(apiPrefix string, apiGroupInfo *APIGroupInfo, openAPIModels openapiproto.Models) error {
func (s *GenericAPIServer) installAPIResources(apiPrefix string, apiGroupInfo *APIGroupInfo, openAPIModels map[string]*spec.Schema) error {
var typeConverter managedfields.TypeConverter
if len(openAPIModels) > 0 {
var err error
typeConverter, err = managedfields.NewTypeConverter(openAPIModels, false)
if err != nil {
return err
}
}
var resourceInfos []*storageversion.ResourceInfo
for _, groupVersion := range apiGroupInfo.PrioritizedVersions {
if len(apiGroupInfo.VersionedResourcesStorageMap[groupVersion.Version]) == 0 {
@ -668,16 +760,7 @@ func (s *GenericAPIServer) installAPIResources(apiPrefix string, apiGroupInfo *A
if apiGroupInfo.OptionsExternalVersion != nil {
apiGroupVersion.OptionsExternalVersion = apiGroupInfo.OptionsExternalVersion
}
apiGroupVersion.OpenAPIModels = openAPIModels
if openAPIModels != nil {
typeConverter, err := fieldmanager.NewTypeConverter(openAPIModels, false)
if err != nil {
return err
}
apiGroupVersion.TypeConverter = typeConverter
}
apiGroupVersion.TypeConverter = typeConverter
apiGroupVersion.MaxRequestBodyBytes = s.maxRequestBodyBytes
discoveryAPIResources, r, err := apiGroupVersion.InstallREST(s.Handler.GoRestfulContainer)
@ -693,6 +776,7 @@ func (s *GenericAPIServer) installAPIResources(apiPrefix string, apiGroupInfo *A
s.AggregatedDiscoveryGroupManager.AddGroupVersion(
groupVersion.Group,
apidiscoveryv2beta1.APIVersionDiscovery{
Freshness: apidiscoveryv2beta1.DiscoveryFreshnessCurrent,
Version: groupVersion.Version,
Resources: discoveryAPIResources,
},
@ -702,6 +786,7 @@ func (s *GenericAPIServer) installAPIResources(apiPrefix string, apiGroupInfo *A
s.AggregatedLegacyDiscoveryGroupManager.AddGroupVersion(
groupVersion.Group,
apidiscoveryv2beta1.APIVersionDiscovery{
Freshness: apidiscoveryv2beta1.DiscoveryFreshnessCurrent,
Version: groupVersion.Version,
Resources: discoveryAPIResources,
},
@ -868,8 +953,10 @@ func NewDefaultAPIGroupInfo(group string, scheme *runtime.Scheme, parameterCodec
}
// getOpenAPIModels is a private method for getting the OpenAPI models
func (s *GenericAPIServer) getOpenAPIModels(apiPrefix string, apiGroupInfos ...*APIGroupInfo) (openapiproto.Models, error) {
if s.openAPIConfig == nil {
func (s *GenericAPIServer) getOpenAPIModels(apiPrefix string, apiGroupInfos ...*APIGroupInfo) (map[string]*spec.Schema, error) {
if s.openAPIV3Config == nil {
//!TODO: A future work should add a requirement that
// OpenAPIV3 config is required. May require some refactoring of tests.
return nil, nil
}
pathsToIgnore := openapiutil.NewTrie(s.openAPIConfig.IgnorePrefixes)
@ -883,14 +970,14 @@ func (s *GenericAPIServer) getOpenAPIModels(apiPrefix string, apiGroupInfos ...*
}
// Build the openapi definitions for those resources and convert it to proto models
openAPISpec, err := openapibuilder2.BuildOpenAPIDefinitionsForResources(s.openAPIConfig, resourceNames...)
openAPISpec, err := openapibuilder3.BuildOpenAPIDefinitionsForResources(s.openAPIV3Config, resourceNames...)
if err != nil {
return nil, err
}
for _, apiGroupInfo := range apiGroupInfos {
apiGroupInfo.StaticOpenAPISpec = openAPISpec
}
return utilopenapi.ToProtoModels(openAPISpec)
return openAPISpec, nil
}
// getResourceNamesForGroup is a private method for getting the canonical names for each resource to build in an api group

View File

@ -146,6 +146,14 @@ type lifecycleSignals struct {
MuxAndDiscoveryComplete lifecycleSignal
}
// ShuttingDown returns the lifecycle signal that is signaled when
// the server is not accepting any new requests.
// this is the lifecycle event that is exported to the request handler
// logic to indicate that the server is shutting down.
func (s lifecycleSignals) ShuttingDown() <-chan struct{} {
return s.NotAcceptingNewRequest.Signaled()
}
// newLifecycleSignals returns an instance of lifecycleSignals interface to be used
// to coordinate lifecycle of the apiserver
func newLifecycleSignals() lifecycleSignals {

View File

@ -76,6 +76,16 @@ func (s *RequestHeaderAuthenticationOptions) Validate() []error {
allErrors = append(allErrors, err)
}
if len(s.UsernameHeaders) > 0 && !caseInsensitiveHas(s.UsernameHeaders, "X-Remote-User") {
klog.Warningf("--requestheader-username-headers is set without specifying the standard X-Remote-User header - API aggregation will not work")
}
if len(s.GroupHeaders) > 0 && !caseInsensitiveHas(s.GroupHeaders, "X-Remote-Group") {
klog.Warningf("--requestheader-group-headers is set without specifying the standard X-Remote-Group header - API aggregation will not work")
}
if len(s.ExtraHeaderPrefixes) > 0 && !caseInsensitiveHas(s.ExtraHeaderPrefixes, "X-Remote-Extra-") {
klog.Warningf("--requestheader-extra-headers-prefix is set without specifying the standard X-Remote-Extra- header prefix - API aggregation will not work")
}
return allErrors
}
@ -89,6 +99,15 @@ func checkForWhiteSpaceOnly(flag string, headerNames ...string) error {
return nil
}
func caseInsensitiveHas(headers []string, header string) bool {
for _, h := range headers {
if strings.EqualFold(h, header) {
return true
}
}
return false
}
func (s *RequestHeaderAuthenticationOptions) AddFlags(fs *pflag.FlagSet) {
if s == nil {
return
@ -357,6 +376,7 @@ func (s *DelegatingAuthenticationOptions) ApplyTo(authenticationInfo *server.Aut
}
if requestHeaderConfig != nil {
cfg.RequestHeaderConfig = requestHeaderConfig
authenticationInfo.RequestHeaderConfig = requestHeaderConfig
if err = authenticationInfo.ApplyClientCert(cfg.RequestHeaderConfig.CAContentProvider, servingInfo); err != nil {
return fmt.Errorf("unable to load request-header-client-ca-file: %v", err)
}

View File

@ -38,9 +38,8 @@ import (
// DelegatingAuthorizationOptions provides an easy way for composing API servers to delegate their authorization to
// the root kube API server.
// WARNING: never assume that every authenticated incoming request already does authorization.
//
// The aggregator in the kube API server does this today, but this behaviour is not
// guaranteed in the future.
// The aggregator in the kube API server does this today, but this behaviour is not
// guaranteed in the future.
type DelegatingAuthorizationOptions struct {
// RemoteKubeConfigFile is the file to use to connect to a "normal" kube API server which hosts the
// SubjectAccessReview.authorization.k8s.io endpoint for checking tokens.

View File

@ -36,6 +36,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/serializer"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/util/wait"
apiserverconfig "k8s.io/apiserver/pkg/apis/config"
apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1"
@ -46,9 +47,12 @@ import (
aestransformer "k8s.io/apiserver/pkg/storage/value/encrypt/aes"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope"
envelopekmsv2 "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
"k8s.io/apiserver/pkg/storage/value/encrypt/identity"
"k8s.io/apiserver/pkg/storage/value/encrypt/secretbox"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
kmsservice "k8s.io/kms/pkg/service"
)
const (
@ -57,11 +61,46 @@ const (
secretboxTransformerPrefixV1 = "k8s:enc:secretbox:v1:"
kmsTransformerPrefixV1 = "k8s:enc:kms:v1:"
kmsTransformerPrefixV2 = "k8s:enc:kms:v2:"
kmsPluginHealthzNegativeTTL = 3 * time.Second
kmsPluginHealthzPositiveTTL = 20 * time.Second
kmsAPIVersionV1 = "v1"
kmsAPIVersionV2 = "v2"
kmsReloadHealthCheckName = "kms-providers"
// these constants relate to how the KMS v2 plugin status poll logic
// and the DEK generation logic behave. In particular, the positive
// interval and max TTL are closely related as the difference between
// these values defines the worst case window in which the write DEK
// could expire due to the plugin going into an error state. The
// worst case window divided by the negative interval defines the
// minimum amount of times the server will attempt to return to a
// healthy state before the DEK expires and writes begin to fail.
//
// For now, these values are kept small and hardcoded to support being
// able to perform a "passive" storage migration while tolerating some
// amount of plugin downtime.
//
// With the current approach, a user can update the key ID their plugin
// is using and then can simply schedule a migration for 3 + N + M minutes
// later where N is how long it takes their plugin to pick up new config
// and M is extra buffer to allow the API server to process the config.
// At that point, they are guaranteed to either migrate to the new key
// or get errors during the migration.
//
// If the API server coasted forever on the last DEK, they would need
// to actively check if it had observed the new key ID before starting
// a migration - otherwise it could keep using the old DEK and their
// storage migration would not do what they thought it did.
kmsv2PluginHealthzPositiveInterval = 1 * time.Minute
kmsv2PluginHealthzNegativeInterval = 10 * time.Second
kmsv2PluginWriteDEKMaxTTL = 3 * time.Minute
kmsPluginHealthzNegativeTTL = 3 * time.Second
kmsPluginHealthzPositiveTTL = 20 * time.Second
kmsAPIVersionV1 = "v1"
kmsAPIVersionV2 = "v2"
// this name is used for two different healthz endpoints:
// - when one or more KMS v2 plugins are in use and no KMS v1 plugins are in use
// in this case, all v2 plugins are probed via this single endpoint
// - when automatic reload of encryption config is enabled
// in this case, all KMS plugins are probed via this single endpoint
// the endpoint is present even if there are no KMS plugins configured (it is a no-op then)
kmsReloadHealthCheckName = "kms-providers"
)
type kmsPluginHealthzResponse struct {
@ -78,9 +117,10 @@ type kmsPluginProbe struct {
}
type kmsv2PluginProbe struct {
state atomic.Pointer[envelopekmsv2.State]
name string
ttl time.Duration
service envelopekmsv2.Service
service kmsservice.Service
lastResponse *kmsPluginHealthzResponse
l *sync.Mutex
}
@ -133,15 +173,16 @@ type EncryptionConfiguration struct {
}
// LoadEncryptionConfig parses and validates the encryption config specified by filepath.
// It may launch multiple go routines whose lifecycle is controlled by stopCh.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
// If reload is true, or KMS v2 plugins are used with no KMS v1 plugins, the returned slice of health checkers will always be of length 1.
func LoadEncryptionConfig(filepath string, reload bool, stopCh <-chan struct{}) (*EncryptionConfiguration, error) {
func LoadEncryptionConfig(ctx context.Context, filepath string, reload bool) (*EncryptionConfiguration, error) {
config, contentHash, err := loadConfig(filepath, reload)
if err != nil {
return nil, fmt.Errorf("error while parsing file: %w", err)
}
transformers, kmsHealthChecks, kmsUsed, err := getTransformerOverridesAndKMSPluginHealthzCheckers(config, stopCh)
transformers, kmsHealthChecks, kmsUsed, err := getTransformerOverridesAndKMSPluginHealthzCheckers(ctx, config)
if err != nil {
return nil, fmt.Errorf("error while building transformers: %w", err)
}
@ -150,7 +191,7 @@ func LoadEncryptionConfig(filepath string, reload bool, stopCh <-chan struct{})
kmsHealthChecks = []healthz.HealthChecker{kmsHealthChecker(kmsHealthChecks)}
}
// KMSTimeout is the duration we will wait before closing old transformers.
// KMSCloseGracePeriod is the duration we will wait before closing old transformers.
// The way we calculate is as follows:
// 1. Sum all timeouts across all KMS plugins. (check kmsPrefixTransformer for differences between v1 and v2)
// 2. Multiply that by 2 (to allow for some buffer)
@ -160,12 +201,15 @@ func LoadEncryptionConfig(filepath string, reload bool, stopCh <-chan struct{})
HealthChecks: kmsHealthChecks,
EncryptionFileContentHash: contentHash,
KMSCloseGracePeriod: 2 * kmsUsed.kmsTimeoutSum,
}, err
}, nil
}
func getTransformerOverridesAndKMSPluginHealthzCheckers(config *apiserverconfig.EncryptionConfiguration, stopCh <-chan struct{}) (map[schema.GroupResource]value.Transformer, []healthz.HealthChecker, *kmsState, error) {
// getTransformerOverridesAndKMSPluginHealthzCheckers creates the set of transformers and KMS healthz checks based on the given config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func getTransformerOverridesAndKMSPluginHealthzCheckers(ctx context.Context, config *apiserverconfig.EncryptionConfiguration) (map[schema.GroupResource]value.Transformer, []healthz.HealthChecker, *kmsState, error) {
var kmsHealthChecks []healthz.HealthChecker
transformers, probes, kmsUsed, err := getTransformerOverridesAndKMSPluginProbes(config, stopCh)
transformers, probes, kmsUsed, err := getTransformerOverridesAndKMSPluginProbes(ctx, config)
if err != nil {
return nil, nil, nil, err
}
@ -181,7 +225,10 @@ type healthChecker interface {
toHealthzCheck(idx int) healthz.HealthChecker
}
func getTransformerOverridesAndKMSPluginProbes(config *apiserverconfig.EncryptionConfiguration, stopCh <-chan struct{}) (map[schema.GroupResource]value.Transformer, []healthChecker, *kmsState, error) {
// getTransformerOverridesAndKMSPluginProbes creates the set of transformers and KMS probes based on the given config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func getTransformerOverridesAndKMSPluginProbes(ctx context.Context, config *apiserverconfig.EncryptionConfiguration) (map[schema.GroupResource]value.Transformer, []healthChecker, *kmsState, error) {
resourceToPrefixTransformer := map[schema.GroupResource][]value.PrefixTransformer{}
var probes []healthChecker
var kmsUsed kmsState
@ -190,21 +237,32 @@ func getTransformerOverridesAndKMSPluginProbes(config *apiserverconfig.Encryptio
for _, resourceConfig := range config.Resources {
resourceConfig := resourceConfig
transformers, p, used, err := prefixTransformersAndProbes(resourceConfig, stopCh)
transformers, p, used, err := prefixTransformersAndProbes(ctx, resourceConfig)
if err != nil {
return nil, nil, nil, err
}
kmsUsed.v1Used = kmsUsed.v1Used || used.v1Used
kmsUsed.v2Used = kmsUsed.v2Used || used.v2Used
kmsUsed.kmsTimeoutSum += used.kmsTimeoutSum
kmsUsed.accumulate(used)
// For each resource, create a list of providers to use
for _, resource := range resourceConfig.Resources {
resource := resource
gr := schema.ParseGroupResource(resource)
resourceToPrefixTransformer[gr] = append(
resourceToPrefixTransformer[gr], transformers...)
// check if resource is masked by *.group rule
anyResourceInGroup := schema.GroupResource{Group: gr.Group, Resource: "*"}
if _, masked := resourceToPrefixTransformer[anyResourceInGroup]; masked {
// an earlier rule already configured a transformer for *.group, masking this rule
// return error since this is not allowed
return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyResourceInGroup))
}
if _, masked := resourceToPrefixTransformer[anyGroupAnyResource]; masked {
// an earlier rule already configured a transformer for *.*, masking this rule
// return error since this is not allowed
return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyGroupAnyResource))
}
resourceToPrefixTransformer[gr] = append(resourceToPrefixTransformer[gr], transformers...)
}
probes = append(probes, p...)
@ -252,7 +310,7 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error {
h.l.Lock()
defer h.l.Unlock()
if (time.Since(h.lastResponse.received)) < h.ttl {
if time.Since(h.lastResponse.received) < h.ttl {
return h.lastResponse.err
}
@ -263,7 +321,7 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error {
return fmt.Errorf("failed to perform status section of the healthz check for KMS Provider %s, error: %w", h.name, err)
}
if err := isKMSv2ProviderHealthy(h.name, p); err != nil {
if err := h.isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx, p); err != nil {
h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()}
h.ttl = kmsPluginHealthzNegativeTTL
return err
@ -274,8 +332,93 @@ func (h *kmsv2PluginProbe) check(ctx context.Context) error {
return nil
}
// isKMSv2ProviderHealthy checks if the KMSv2-Plugin is healthy.
func isKMSv2ProviderHealthy(name string, response *envelopekmsv2.StatusResponse) error {
// rotateDEKOnKeyIDChange tries to rotate to a new DEK if the key ID returned by Status does not match the
// current state. If a successful rotation is performed, the new DEK and keyID overwrite the existing state.
// On any failure during rotation (including mismatch between status and encrypt calls), the current state is
// preserved and will remain valid to use for encryption until its expiration (the system attempts to coast).
// If the key ID returned by Status matches the current state, the expiration of the current state is extended
// and no rotation is performed.
func (h *kmsv2PluginProbe) rotateDEKOnKeyIDChange(ctx context.Context, statusKeyID, uid string) error {
// we do not check ValidateEncryptCapability here because it is fine to re-use an old key
// that was marked as expired during an unhealthy period. As long as the key ID matches
// what we expect then there is no need to rotate here.
state, errState := h.getCurrentState()
// allow reads indefinitely in all cases
// allow writes indefinitely as long as there is no error
// allow writes for only up to kmsv2PluginWriteDEKMaxTTL from now when there are errors
// we start the timer before we make the network call because kmsv2PluginWriteDEKMaxTTL is meant to be the upper bound
expirationTimestamp := envelopekmsv2.NowFunc().Add(kmsv2PluginWriteDEKMaxTTL)
// state is valid and status keyID is unchanged from when we generated this DEK so there is no need to rotate it
// just move the expiration of the current state forward by the reuse interval
if errState == nil && state.KeyID == statusKeyID {
state.ExpirationTimestamp = expirationTimestamp
h.state.Store(&state)
return nil
}
transformer, resp, cacheKey, errGen := envelopekmsv2.GenerateTransformer(ctx, uid, h.service)
if resp == nil {
resp = &kmsservice.EncryptResponse{} // avoid nil panics
}
// happy path, should be the common case
// TODO maybe add success metrics?
if errGen == nil && resp.KeyID == statusKeyID {
h.state.Store(&envelopekmsv2.State{
Transformer: transformer,
EncryptedDEK: resp.Ciphertext,
KeyID: resp.KeyID,
Annotations: resp.Annotations,
UID: uid,
ExpirationTimestamp: expirationTimestamp,
CacheKey: cacheKey,
})
klog.V(6).InfoS("successfully rotated DEK",
"uid", uid,
"newKeyID", resp.KeyID,
"oldKeyID", state.KeyID,
"expirationTimestamp", expirationTimestamp.Format(time.RFC3339),
)
return nil
}
return fmt.Errorf("failed to rotate DEK uid=%q, errState=%v, errGen=%v, statusKeyID=%q, encryptKeyID=%q, stateKeyID=%q, expirationTimestamp=%s",
uid, errState, errGen, statusKeyID, resp.KeyID, state.KeyID, state.ExpirationTimestamp.Format(time.RFC3339))
}
// getCurrentState returns the latest state from the last status and encrypt calls.
// If the returned error is nil, the state is considered valid indefinitely for read requests.
// For write requests, the caller must also check that state.ValidateEncryptCapability does not error.
func (h *kmsv2PluginProbe) getCurrentState() (envelopekmsv2.State, error) {
state := *h.state.Load()
if state.Transformer == nil {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected nil transformer")
}
if len(state.EncryptedDEK) == 0 {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty EncryptedDEK")
}
if len(state.KeyID) == 0 {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty keyID")
}
if state.ExpirationTimestamp.IsZero() {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected zero expirationTimestamp")
}
if len(state.CacheKey) == 0 {
return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty cacheKey")
}
return state, nil
}
func (h *kmsv2PluginProbe) isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx context.Context, response *kmsservice.StatusResponse) error {
var errs []error
if response.Healthz != "ok" {
errs = append(errs, fmt.Errorf("got unexpected healthz status: %s", response.Healthz))
@ -283,12 +426,18 @@ func isKMSv2ProviderHealthy(name string, response *envelopekmsv2.StatusResponse)
if response.Version != envelopekmsv2.KMSAPIVersion {
errs = append(errs, fmt.Errorf("expected KMSv2 API version %s, got %s", envelopekmsv2.KMSAPIVersion, response.Version))
}
if len(response.KeyID) == 0 {
errs = append(errs, fmt.Errorf("expected KMSv2 KeyID to be set, got %s", response.KeyID))
if errCode, err := envelopekmsv2.ValidateKeyID(response.KeyID); err != nil {
metrics.RecordInvalidKeyIDFromStatus(h.name, string(errCode))
errs = append(errs, fmt.Errorf("got invalid KMSv2 KeyID %q: %w", response.KeyID, err))
} else {
metrics.RecordKeyIDFromStatus(h.name, response.KeyID)
// unconditionally append as we filter out nil errors below
errs = append(errs, h.rotateDEKOnKeyIDChange(ctx, response.KeyID, string(uuid.NewUUID())))
}
if err := utilerrors.Reduce(utilerrors.NewAggregate(errs)); err != nil {
return fmt.Errorf("kmsv2 Provider %s is not healthy, error: %w", name, err)
return fmt.Errorf("kmsv2 Provider %s is not healthy, error: %w", h.name, err)
}
return nil
}
@ -316,7 +465,7 @@ func loadConfig(filepath string, reload bool) (*apiserverconfig.EncryptionConfig
configObj, gvk, err := codecs.UniversalDecoder().Decode(data, nil, nil)
if err != nil {
return nil, "", err
return nil, "", fmt.Errorf("error decoding encryption provider configuration file %q: %w", filepath, err)
}
config, ok := configObj.(*apiserverconfig.EncryptionConfiguration)
if !ok {
@ -326,7 +475,10 @@ func loadConfig(filepath string, reload bool) (*apiserverconfig.EncryptionConfig
return config, computeEncryptionConfigHash(data), validation.ValidateEncryptionConfiguration(config, reload).ToAggregate()
}
func prefixTransformersAndProbes(config apiserverconfig.ResourceConfiguration, stopCh <-chan struct{}) ([]value.PrefixTransformer, []healthChecker, *kmsState, error) {
// prefixTransformersAndProbes creates the set of transformers and KMS probes based on the given resource config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func prefixTransformersAndProbes(ctx context.Context, config apiserverconfig.ResourceConfiguration) ([]value.PrefixTransformer, []healthChecker, *kmsState, error) {
var transformers []value.PrefixTransformer
var probes []healthChecker
var kmsUsed kmsState
@ -345,20 +497,19 @@ func prefixTransformersAndProbes(config apiserverconfig.ResourceConfiguration, s
transformer, transformerErr = aesPrefixTransformer(provider.AESGCM, aestransformer.NewGCMTransformer, aesGCMTransformerPrefixV1)
case provider.AESCBC != nil:
transformer, transformerErr = aesPrefixTransformer(provider.AESCBC, aestransformer.NewCBCTransformer, aesCBCTransformerPrefixV1)
cbcTransformer := func(block cipher.Block) (value.Transformer, error) {
return aestransformer.NewCBCTransformer(block), nil
}
transformer, transformerErr = aesPrefixTransformer(provider.AESCBC, cbcTransformer, aesCBCTransformerPrefixV1)
case provider.Secretbox != nil:
transformer, transformerErr = secretboxPrefixTransformer(provider.Secretbox)
case provider.KMS != nil:
transformer, probe, used, transformerErr = kmsPrefixTransformer(provider.KMS, stopCh)
transformer, probe, used, transformerErr = kmsPrefixTransformer(ctx, provider.KMS)
if transformerErr == nil {
probes = append(probes, probe)
kmsUsed.v1Used = kmsUsed.v1Used || used.v1Used
kmsUsed.v2Used = kmsUsed.v2Used || used.v2Used
// calculate the maximum timeout for all KMS providers
kmsUsed.kmsTimeoutSum += used.kmsTimeoutSum
kmsUsed.accumulate(used)
}
case provider.Identity != nil:
@ -381,7 +532,7 @@ func prefixTransformersAndProbes(config apiserverconfig.ResourceConfiguration, s
return transformers, probes, &kmsUsed, nil
}
type blockTransformerFunc func(cipher.Block) value.Transformer
type blockTransformerFunc func(cipher.Block) (value.Transformer, error)
func aesPrefixTransformer(config *apiserverconfig.AESConfiguration, fn blockTransformerFunc, prefix string) (value.PrefixTransformer, error) {
var result value.PrefixTransformer
@ -405,17 +556,21 @@ func aesPrefixTransformer(config *apiserverconfig.AESConfiguration, fn blockTran
keyData := keyData
key, err := base64.StdEncoding.DecodeString(keyData.Secret)
if err != nil {
return result, fmt.Errorf("could not obtain secret for named key %s: %s", keyData.Name, err)
return result, fmt.Errorf("could not obtain secret for named key %s: %w", keyData.Name, err)
}
block, err := aes.NewCipher(key)
if err != nil {
return result, fmt.Errorf("error while creating cipher for named key %s: %s", keyData.Name, err)
return result, fmt.Errorf("error while creating cipher for named key %s: %w", keyData.Name, err)
}
transformer, err := fn(block)
if err != nil {
return result, fmt.Errorf("error while creating transformer for named key %s: %w", keyData.Name, err)
}
// Create a new PrefixTransformer for this key
keyTransformers = append(keyTransformers,
value.PrefixTransformer{
Transformer: fn(block),
Transformer: transformer,
Prefix: []byte(keyData.Name + ":"),
})
}
@ -497,10 +652,20 @@ type kmsState struct {
kmsTimeoutSum time.Duration
}
func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-chan struct{}) (value.PrefixTransformer, healthChecker, *kmsState, error) {
// we ignore the cancel func because this context should only be canceled when stopCh is closed
ctx, _ := wait.ContextForChannel(stopCh)
// accumulate computes the KMS state by:
// - determining which KMS plugin versions are in use
// - calculating kmsTimeoutSum which is used as transformTracker.kmsCloseGracePeriod
// DynamicTransformers.Set waits for this period before closing old transformers after a config reload
func (s *kmsState) accumulate(other *kmsState) {
s.v1Used = s.v1Used || other.v1Used
s.v2Used = s.v2Used || other.v2Used
s.kmsTimeoutSum += other.kmsTimeoutSum
}
// kmsPrefixTransformer creates a KMS transformer and probe based on the given KMS config.
// It may launch multiple go routines whose lifecycle is controlled by ctx.
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
func kmsPrefixTransformer(ctx context.Context, config *apiserverconfig.KMSConfiguration) (value.PrefixTransformer, healthChecker, *kmsState, error) {
kmsName := config.Name
switch config.APIVersion {
case kmsAPIVersionV1:
@ -530,7 +695,7 @@ func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-cha
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2 plugin %q, KMSv2 feature is not enabled", kmsName)
}
envelopeService, err := EnvelopeKMSv2ServiceFactory(ctx, config.Endpoint, config.Timeout.Duration)
envelopeService, err := EnvelopeKMSv2ServiceFactory(ctx, config.Endpoint, config.Name, config.Timeout.Duration)
if err != nil {
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2-Plugin's probe %q, error: %w", kmsName, err)
}
@ -542,10 +707,49 @@ func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-cha
l: &sync.Mutex{},
lastResponse: &kmsPluginHealthzResponse{},
}
// initialize state so that Load always works
probe.state.Store(&envelopekmsv2.State{})
runProbeCheckAndLog := func(ctx context.Context) error {
if err := probe.check(ctx); err != nil {
klog.VDepth(1, 2).ErrorS(err, "kms plugin failed health check probe", "name", kmsName)
return err
}
return nil
}
// on the happy path where the plugin is healthy and available on server start,
// prime keyID and DEK by running the check inline once (this also prevents unit tests from flaking)
// ignore the error here since we want to support the plugin starting up async with the API server
_ = runProbeCheckAndLog(ctx)
// make sure that the plugin's key ID is reasonably up-to-date
// also, make sure that our DEK is up-to-date to with said key ID (if it expires the server will fail all writes)
// if this background loop ever stops running, the server will become unfunctional after kmsv2PluginWriteDEKMaxTTL
go wait.PollUntilWithContext(
ctx,
kmsv2PluginHealthzPositiveInterval,
func(ctx context.Context) (bool, error) {
if err := runProbeCheckAndLog(ctx); err == nil {
return false, nil
}
// TODO add integration test for quicker error poll on failure
// if we fail, block the outer polling and start a new quicker poll inline
// this limits the chance that our DEK expires during a transient failure
_ = wait.PollUntilWithContext(
ctx,
kmsv2PluginHealthzNegativeInterval,
func(ctx context.Context) (bool, error) {
return runProbeCheckAndLog(ctx) == nil, nil
},
)
return false, nil
})
// using AES-GCM by default for encrypting data with KMSv2
transformer := value.PrefixTransformer{
Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, int(*config.CacheSize), aestransformer.NewGCMTransformer),
Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, kmsName, probe.getCurrentState),
Prefix: []byte(kmsTransformerPrefixV2 + kmsName + ":"),
}
@ -560,12 +764,17 @@ func kmsPrefixTransformer(config *apiserverconfig.KMSConfiguration, stopCh <-cha
}
func envelopePrefixTransformer(config *apiserverconfig.KMSConfiguration, envelopeService envelope.Service, prefix string) value.PrefixTransformer {
baseTransformerFunc := func(block cipher.Block) value.Transformer {
baseTransformerFunc := func(block cipher.Block) (value.Transformer, error) {
gcm, err := aestransformer.NewGCMTransformer(block)
if err != nil {
return nil, err
}
// v1.24: write using AES-CBC only but support reads via AES-CBC and AES-GCM (so we can move to AES-GCM)
// v1.25: write using AES-GCM only but support reads via AES-GCM and fallback to AES-CBC for backwards compatibility
// TODO(aramase): Post v1.25: We cannot drop CBC read support until we automate storage migration.
// We could have a release note that hard requires users to perform storage migration.
return unionTransformers{aestransformer.NewGCMTransformer(block), aestransformer.NewCBCTransformer(block)}
return unionTransformers{gcm, aestransformer.NewCBCTransformer(block)}, nil
}
return value.PrefixTransformer{
@ -606,6 +815,7 @@ func computeEncryptionConfigHash(data []byte) string {
return fmt.Sprintf("%x", sha256.Sum256(data))
}
var _ ResourceTransformers = &DynamicTransformers{}
var _ healthz.HealthChecker = &DynamicTransformers{}
// DynamicTransformers holds transformers that may be dynamically updated via a single external actor, likely a controller.
@ -704,27 +914,49 @@ func (r *resourceTransformer) TransformToStorage(ctx context.Context, data []byt
}
func (r *resourceTransformer) transformer() value.Transformer {
transformer := r.transformTracker.Load().(*transformTracker).transformerOverrides[r.resource]
if transformer == nil {
return identity.NewEncryptCheckTransformer()
}
return transformer
return transformerFromOverrides(r.transformTracker.Load().(*transformTracker).transformerOverrides, r.resource)
}
type ResourceTransformers interface {
TransformerForResource(resource schema.GroupResource) value.Transformer
}
var _ ResourceTransformers = &DynamicTransformers{}
var _ ResourceTransformers = &StaticTransformers{}
type StaticTransformers map[schema.GroupResource]value.Transformer
// StaticTransformers
func (s StaticTransformers) TransformerForResource(resource schema.GroupResource) value.Transformer {
transformer := s[resource]
if transformer == nil {
return identity.NewEncryptCheckTransformer()
}
return transformer
return transformerFromOverrides(s, resource)
}
var anyGroupAnyResource = schema.GroupResource{
Group: "*",
Resource: "*",
}
func transformerFromOverrides(transformerOverrides map[schema.GroupResource]value.Transformer, resource schema.GroupResource) value.Transformer {
if transformer := transformerOverrides[resource]; transformer != nil {
return transformer
}
if transformer := transformerOverrides[schema.GroupResource{
Group: resource.Group,
Resource: "*",
}]; transformer != nil {
return transformer
}
if transformer := transformerOverrides[anyGroupAnyResource]; transformer != nil {
return transformer
}
return identity.NewEncryptCheckTransformer()
}
func grYAMLString(gr schema.GroupResource) string {
if gr.Group == "" && gr.Resource == "*" {
return "*."
}
return gr.String()
}

View File

@ -49,27 +49,22 @@ type DynamicKMSEncryptionConfigContent struct {
// dynamicTransformers updates the transformers when encryption config file changes.
dynamicTransformers *encryptionconfig.DynamicTransformers
// stopCh used here is a lifecycle signal of genericapiserver already drained while shutting down.
stopCh <-chan struct{}
}
// NewDynamicKMSEncryptionConfiguration returns controller that dynamically reacts to changes in encryption config file.
func NewDynamicKMSEncryptionConfiguration(
// NewDynamicEncryptionConfiguration returns controller that dynamically reacts to changes in encryption config file.
func NewDynamicEncryptionConfiguration(
name, filePath string,
dynamicTransformers *encryptionconfig.DynamicTransformers,
configContentHash string,
stopCh <-chan struct{},
) *DynamicKMSEncryptionConfigContent {
encryptionConfig := &DynamicKMSEncryptionConfigContent{
name: name,
filePath: filePath,
lastLoadedEncryptionConfigHash: configContentHash,
dynamicTransformers: dynamicTransformers,
stopCh: stopCh,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), fmt.Sprintf("%s-hot-reload", name)),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), name),
}
encryptionConfig.queue.Add(workqueueKey)
encryptionConfig.queue.Add(workqueueKey) // to avoid missing any file changes that occur in between the initial load and Run
return encryptionConfig
}
@ -83,21 +78,21 @@ func (d *DynamicKMSEncryptionConfigContent) Run(ctx context.Context) {
defer klog.InfoS("Shutting down controller", "name", d.name)
// start worker for processing content
go wait.Until(d.runWorker, time.Second, ctx.Done())
go wait.UntilWithContext(ctx, d.runWorker, time.Second)
// start the loop that watches the encryption config file until stopCh is closed.
go wait.Until(func() {
if err := d.watchEncryptionConfigFile(ctx.Done()); err != nil {
go wait.UntilWithContext(ctx, func(ctx context.Context) {
if err := d.watchEncryptionConfigFile(ctx); err != nil {
// if there is an error while setting up or handling the watches, this will ensure that we will process the config file.
defer d.queue.Add(workqueueKey)
klog.ErrorS(err, "Failed to watch encryption config file, will retry later")
}
}, time.Second, ctx.Done())
}, time.Second)
<-ctx.Done()
}
func (d *DynamicKMSEncryptionConfigContent) watchEncryptionConfigFile(stopCh <-chan struct{}) error {
func (d *DynamicKMSEncryptionConfigContent) watchEncryptionConfigFile(ctx context.Context) error {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return fmt.Errorf("error creating fsnotify watcher: %w", err)
@ -116,7 +111,7 @@ func (d *DynamicKMSEncryptionConfigContent) watchEncryptionConfigFile(stopCh <-c
}
case err := <-watcher.Errors:
return fmt.Errorf("received fsnotify error: %w", err)
case <-stopCh:
case <-ctx.Done():
return nil
}
}
@ -142,13 +137,13 @@ func (d *DynamicKMSEncryptionConfigContent) handleWatchEvent(event fsnotify.Even
}
// runWorker to process file content
func (d *DynamicKMSEncryptionConfigContent) runWorker() {
for d.processNextWorkItem() {
func (d *DynamicKMSEncryptionConfigContent) runWorker(ctx context.Context) {
for d.processNextWorkItem(ctx) {
}
}
// processNextWorkItem processes file content when there is a message in the queue.
func (d *DynamicKMSEncryptionConfigContent) processNextWorkItem() bool {
func (d *DynamicKMSEncryptionConfigContent) processNextWorkItem(serverCtx context.Context) bool {
// key here is dummy item in the queue to trigger file content processing.
key, quit := d.queue.Get()
if quit {
@ -163,12 +158,15 @@ func (d *DynamicKMSEncryptionConfigContent) processNextWorkItem() bool {
configChanged bool
)
// get context to close the new transformers.
ctx, closeTransformers := wait.ContextForChannel(d.stopCh)
// get context to close the new transformers (on error cases and on the next reload)
// serverCtx is attached to the API server's lifecycle so we will always close transformers on shut down
ctx, closeTransformers := context.WithCancel(serverCtx)
defer func() {
// TODO: increment success metric when updatedEffectiveConfig=true
// TODO can work queue metrics help here?
if !updatedEffectiveConfig {
// avoid leaking if we're not using the newly constructed transformers (due to an error or them not being changed)
closeTransformers()
@ -222,7 +220,7 @@ func (d *DynamicKMSEncryptionConfigContent) processEncryptionConfig(ctx context.
err error,
) {
// this code path will only execute if reload=true. So passing true explicitly.
encryptionConfiguration, err = encryptionconfig.LoadEncryptionConfig(d.filePath, true, ctx.Done())
encryptionConfiguration, err = encryptionconfig.LoadEncryptionConfig(ctx, d.filePath, true)
if err != nil {
return nil, false, err
}
@ -247,7 +245,12 @@ func (d *DynamicKMSEncryptionConfigContent) validateNewTransformersHealth(
kmsPluginCloseGracePeriod = 10 * time.Second
}
pollErr := wait.PollImmediate(100*time.Millisecond, kmsPluginCloseGracePeriod, func() (bool, error) {
// really make sure that the immediate check does not hang
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, kmsPluginCloseGracePeriod)
defer cancel()
pollErr := wait.PollImmediateWithContext(ctx, 100*time.Millisecond, kmsPluginCloseGracePeriod, func(ctx context.Context) (bool, error) {
// create a fake http get request to health check endpoint
req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("/healthz/%s", kmsPluginHealthzCheck.Name()), nil)
if err != nil {

View File

@ -17,6 +17,7 @@ limitations under the License.
package options
import (
"context"
"fmt"
"net/http"
"strconv"
@ -33,7 +34,7 @@ import (
"k8s.io/apiserver/pkg/server"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/apiserver/pkg/server/options/encryptionconfig"
kmsconfigcontroller "k8s.io/apiserver/pkg/server/options/encryptionconfig/controller"
encryptionconfigcontroller "k8s.io/apiserver/pkg/server/options/encryptionconfig/controller"
serverstorage "k8s.io/apiserver/pkg/server/storage"
"k8s.io/apiserver/pkg/storage/storagebackend"
storagefactory "k8s.io/apiserver/pkg/storage/storagebackend/factory"
@ -228,10 +229,11 @@ func (s *EtcdOptions) Complete(
}
if len(s.EncryptionProviderConfigFilepath) != 0 {
ctxTransformers, closeTransformers := wait.ContextForChannel(stopCh)
ctxServer, _ := wait.ContextForChannel(stopCh) // explicitly ignore cancel here because we do not own the server's lifecycle
ctxServer := wait.ContextForChannel(stopCh)
// nolint:govet // The only code path where closeTransformers does not get called is when it gets stored in dynamicTransformers.
ctxTransformers, closeTransformers := context.WithCancel(ctxServer)
encryptionConfiguration, err := encryptionconfig.LoadEncryptionConfig(s.EncryptionProviderConfigFilepath, s.EncryptionProviderConfigAutomaticReload, ctxTransformers.Done())
encryptionConfiguration, err := encryptionconfig.LoadEncryptionConfig(ctxTransformers, s.EncryptionProviderConfigFilepath, s.EncryptionProviderConfigAutomaticReload)
if err != nil {
// in case of error, we want to close partially initialized (if any) transformers
closeTransformers()
@ -247,25 +249,22 @@ func (s *EtcdOptions) Complete(
return fmt.Errorf("failed to start kms encryption config hot reload controller. only 1 health check should be available when reload is enabled")
}
// Here the dynamic transformers take ownership of the transformers and their cancellation.
dynamicTransformers := encryptionconfig.NewDynamicTransformers(encryptionConfiguration.Transformers, encryptionConfiguration.HealthChecks[0], closeTransformers, encryptionConfiguration.KMSCloseGracePeriod)
s.resourceTransformers = dynamicTransformers
s.kmsPluginHealthzChecks = []healthz.HealthChecker{dynamicTransformers}
// add post start hook to start hot reload controller
// adding this hook here will ensure that it gets configured exactly once
err = addPostStartHook(
"start-encryption-provider-config-automatic-reload",
func(hookContext server.PostStartHookContext) error {
kmsConfigController := kmsconfigcontroller.NewDynamicKMSEncryptionConfiguration(
"kms-encryption-config",
func(_ server.PostStartHookContext) error {
dynamicEncryptionConfigController := encryptionconfigcontroller.NewDynamicEncryptionConfiguration(
"encryption-provider-config-automatic-reload-controller",
s.EncryptionProviderConfigFilepath,
dynamicTransformers,
encryptionConfiguration.EncryptionFileContentHash,
ctxServer.Done(),
)
go kmsConfigController.Run(ctxServer)
go dynamicEncryptionConfigController.Run(ctxServer)
return nil
},
@ -275,6 +274,9 @@ func (s *EtcdOptions) Complete(
closeTransformers()
return fmt.Errorf("failed to add post start hook for kms encryption config hot reload controller: %w", err)
}
s.resourceTransformers = dynamicTransformers
s.kmsPluginHealthzChecks = []healthz.HealthChecker{dynamicTransformers}
} else {
s.resourceTransformers = encryptionconfig.StaticTransformers(encryptionConfiguration.Transformers)
s.kmsPluginHealthzChecks = encryptionConfiguration.HealthChecks
@ -285,6 +287,7 @@ func (s *EtcdOptions) Complete(
s.complete = true
// nolint:govet // The only code path where closeTransformers does not get called is when it gets stored in dynamicTransformers.
return nil
}

View File

@ -25,6 +25,7 @@ import (
type FeatureOptions struct {
EnableProfiling bool
DebugSocketPath string
EnableContentionProfiling bool
}
@ -33,6 +34,7 @@ func NewFeatureOptions() *FeatureOptions {
return &FeatureOptions{
EnableProfiling: defaults.EnableProfiling,
DebugSocketPath: defaults.DebugSocketPath,
EnableContentionProfiling: defaults.EnableContentionProfiling,
}
}
@ -45,7 +47,9 @@ func (o *FeatureOptions) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&o.EnableProfiling, "profiling", o.EnableProfiling,
"Enable profiling via web interface host:port/debug/pprof/")
fs.BoolVar(&o.EnableContentionProfiling, "contention-profiling", o.EnableContentionProfiling,
"Enable lock contention profiling, if profiling is enabled")
"Enable block profiling, if profiling is enabled")
fs.StringVar(&o.DebugSocketPath, "debug-socket-path", o.DebugSocketPath,
"Use an unprotected (no authn/authz) unix-domain socket for profiling with the given path")
}
func (o *FeatureOptions) ApplyTo(c *server.Config) error {
@ -54,6 +58,7 @@ func (o *FeatureOptions) ApplyTo(c *server.Config) error {
}
c.EnableProfiling = o.EnableProfiling
c.DebugSocketPath = o.DebugSocketPath
c.EnableContentionProfiling = o.EnableContentionProfiling
return nil

View File

@ -19,10 +19,10 @@ package options
import (
"fmt"
"net"
"regexp"
"strings"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apiserver/pkg/server"
@ -31,6 +31,16 @@ import (
"github.com/spf13/pflag"
)
const (
corsAllowedOriginsHelpText = "List of allowed origins for CORS, comma separated. " +
"An allowed origin can be a regular expression to support subdomain matching. " +
"If this list is empty CORS will not be enabled. " +
"Please ensure each expression matches the entire hostname by anchoring " +
"to the start with '^' or including the '//' prefix, and by anchoring to the " +
"end with '$' or including the ':' port separator suffix. " +
"Examples of valid expressions are '//example\\.com(:|$)' and '^https://example\\.com(:|$)'"
)
// ServerRunOptions contains the options while running a generic api server.
type ServerRunOptions struct {
AdvertiseAddress net.IP
@ -63,21 +73,39 @@ type ServerRunOptions struct {
// If enabled, after ShutdownDelayDuration elapses, any incoming request is
// rejected with a 429 status code and a 'Retry-After' response.
ShutdownSendRetryAfter bool
// ShutdownWatchTerminationGracePeriod, if set to a positive value,
// is the maximum duration the apiserver will wait for all active
// watch request(s) to drain.
// Once this grace period elapses, the apiserver will no longer
// wait for any active watch request(s) in flight to drain, it will
// proceed to the next step in the graceful server shutdown process.
// If set to a positive value, the apiserver will keep track of the
// number of active watch request(s) in flight and during shutdown
// it will wait, at most, for the specified duration and allow these
// active watch requests to drain with some rate limiting in effect.
// The default is zero, which implies the apiserver will not keep
// track of active watch request(s) in flight and will not wait
// for them to drain, this maintains backward compatibility.
// This grace period is orthogonal to other grace periods, and
// it is not overridden by any other grace period.
ShutdownWatchTerminationGracePeriod time.Duration
}
func NewServerRunOptions() *ServerRunOptions {
defaults := server.NewConfig(serializer.CodecFactory{})
return &ServerRunOptions{
MaxRequestsInFlight: defaults.MaxRequestsInFlight,
MaxMutatingRequestsInFlight: defaults.MaxMutatingRequestsInFlight,
RequestTimeout: defaults.RequestTimeout,
LivezGracePeriod: defaults.LivezGracePeriod,
MinRequestTimeout: defaults.MinRequestTimeout,
ShutdownDelayDuration: defaults.ShutdownDelayDuration,
JSONPatchMaxCopyBytes: defaults.JSONPatchMaxCopyBytes,
MaxRequestBodyBytes: defaults.MaxRequestBodyBytes,
EnablePriorityAndFairness: true,
ShutdownSendRetryAfter: false,
MaxRequestsInFlight: defaults.MaxRequestsInFlight,
MaxMutatingRequestsInFlight: defaults.MaxMutatingRequestsInFlight,
RequestTimeout: defaults.RequestTimeout,
LivezGracePeriod: defaults.LivezGracePeriod,
MinRequestTimeout: defaults.MinRequestTimeout,
ShutdownDelayDuration: defaults.ShutdownDelayDuration,
ShutdownWatchTerminationGracePeriod: defaults.ShutdownWatchTerminationGracePeriod,
JSONPatchMaxCopyBytes: defaults.JSONPatchMaxCopyBytes,
MaxRequestBodyBytes: defaults.MaxRequestBodyBytes,
EnablePriorityAndFairness: true,
ShutdownSendRetryAfter: false,
}
}
@ -97,6 +125,7 @@ func (s *ServerRunOptions) ApplyTo(c *server.Config) error {
c.MaxRequestBodyBytes = s.MaxRequestBodyBytes
c.PublicAddress = s.AdvertiseAddress
c.ShutdownSendRetryAfter = s.ShutdownSendRetryAfter
c.ShutdownWatchTerminationGracePeriod = s.ShutdownWatchTerminationGracePeriod
return nil
}
@ -150,6 +179,10 @@ func (s *ServerRunOptions) Validate() []error {
errors = append(errors, fmt.Errorf("--shutdown-delay-duration can not be negative value"))
}
if s.ShutdownWatchTerminationGracePeriod < 0 {
errors = append(errors, fmt.Errorf("shutdown-watch-termination-grace-period, if provided, can not be a negative value"))
}
if s.JSONPatchMaxCopyBytes < 0 {
errors = append(errors, fmt.Errorf("ServerRunOptions.JSONPatchMaxCopyBytes can not be negative value"))
}
@ -161,6 +194,10 @@ func (s *ServerRunOptions) Validate() []error {
if err := validateHSTSDirectives(s.HSTSDirectives); err != nil {
errors = append(errors, err)
}
if err := validateCorsAllowedOriginList(s.CorsAllowedOriginList); err != nil {
errors = append(errors, err)
}
return errors
}
@ -183,6 +220,57 @@ func validateHSTSDirectives(hstsDirectives []string) error {
return errors.NewAggregate(allErrors)
}
func validateCorsAllowedOriginList(corsAllowedOriginList []string) error {
allErrors := []error{}
validateRegexFn := func(regexpStr string) error {
if _, err := regexp.Compile(regexpStr); err != nil {
return err
}
// the regular expression should pin to the start and end of the host
// in the origin header, this will prevent CVE-2022-1996.
// possible ways it can pin to the start of host in the origin header:
// - match the start of the origin with '^'
// - match what separates the scheme and host with '//' or '://',
// this pins to the start of host in the origin header.
// possible ways it can match the end of the host in the origin header:
// - match the end of the origin with '$'
// - with a capture group that matches the host and port separator '(:|$)'
// We will relax the validation to check if these regex markers
// are present in the user specified expression.
var pinStart, pinEnd bool
for _, prefix := range []string{"^", "//"} {
if strings.Contains(regexpStr, prefix) {
pinStart = true
break
}
}
for _, suffix := range []string{"$", ":"} {
if strings.Contains(regexpStr, suffix) {
pinEnd = true
break
}
}
if !pinStart || !pinEnd {
return fmt.Errorf("regular expression does not pin to start/end of host in the origin header")
}
return nil
}
for _, regexp := range corsAllowedOriginList {
if len(regexp) == 0 {
allErrors = append(allErrors, fmt.Errorf("empty value in --cors-allowed-origins, help: %s", corsAllowedOriginsHelpText))
continue
}
if err := validateRegexFn(regexp); err != nil {
err = fmt.Errorf("--cors-allowed-origins has an invalid regular expression: %v, help: %s", err, corsAllowedOriginsHelpText)
allErrors = append(allErrors, err)
}
}
return errors.NewAggregate(allErrors)
}
// AddUniversalFlags adds flags for a specific APIServer to the specified FlagSet
func (s *ServerRunOptions) AddUniversalFlags(fs *pflag.FlagSet) {
// Note: the weird ""+ in below lines seems to be the only way to get gofmt to
@ -194,9 +282,7 @@ func (s *ServerRunOptions) AddUniversalFlags(fs *pflag.FlagSet) {
"will be used. If --bind-address is unspecified, the host's default interface will "+
"be used.")
fs.StringSliceVar(&s.CorsAllowedOriginList, "cors-allowed-origins", s.CorsAllowedOriginList, ""+
"List of allowed origins for CORS, comma separated. An allowed origin can be a regular "+
"expression to support subdomain matching. If this list is empty CORS will not be enabled.")
fs.StringSliceVar(&s.CorsAllowedOriginList, "cors-allowed-origins", s.CorsAllowedOriginList, corsAllowedOriginsHelpText)
fs.StringSliceVar(&s.HSTSDirectives, "strict-transport-security-directives", s.HSTSDirectives, ""+
"List of directives for HSTS, comma separated. If this list is empty, then HSTS directives will not "+
@ -205,11 +291,6 @@ func (s *ServerRunOptions) AddUniversalFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.ExternalHost, "external-hostname", s.ExternalHost,
"The hostname to use when generating externalized URLs for this master (e.g. Swagger API Docs or OpenID Discovery).")
deprecatedMasterServiceNamespace := metav1.NamespaceDefault
fs.StringVar(&deprecatedMasterServiceNamespace, "master-service-namespace", deprecatedMasterServiceNamespace, ""+
"DEPRECATED: the namespace from which the Kubernetes master services should be injected into pods.")
fs.MarkDeprecated("master-service-namespace", "This flag will be removed in v1.27")
fs.IntVar(&s.MaxRequestsInFlight, "max-requests-inflight", s.MaxRequestsInFlight, ""+
"This and --max-mutating-requests-inflight are summed to determine the server's total concurrency limit "+
"(which must be positive) if --enable-priority-and-fairness is true. "+
@ -257,5 +338,9 @@ func (s *ServerRunOptions) AddUniversalFlags(fs *pflag.FlagSet) {
"during this window all incoming requests will be rejected with a status code 429 and a 'Retry-After' response header, "+
"in addition 'Connection: close' response header is set in order to tear down the TCP connection when idle.")
fs.DurationVar(&s.ShutdownWatchTerminationGracePeriod, "shutdown-watch-termination-grace-period", s.ShutdownWatchTerminationGracePeriod, ""+
"This option, if set, represents the maximum amount of grace period the apiserver will wait "+
"for active watch request(s) to drain during the graceful server shutdown window.")
utilfeature.DefaultMutableFeatureGate.AddFlag(fs)
}

View File

@ -154,9 +154,5 @@ func ReadTracingConfiguration(configFilePath string) (*tracingapi.TracingConfigu
if err := runtime.DecodeInto(codecs.UniversalDecoder(), data, internalConfig); err != nil {
return nil, fmt.Errorf("unable to decode tracing configuration data: %v", err)
}
tc := &tracingapi.TracingConfiguration{
Endpoint: internalConfig.Endpoint,
SamplingRatePerMillion: internalConfig.SamplingRatePerMillion,
}
return tc, nil
return &internalConfig.TracingConfiguration, nil
}

View File

@ -0,0 +1,82 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package routes
import (
"fmt"
"net"
"net/http"
"net/http/pprof"
"os"
"path"
)
// DebugSocket installs profiling and debugflag as a Unix-Domain socket.
type DebugSocket struct {
path string
mux *http.ServeMux
}
// NewDebugSocket creates a new DebugSocket for the given path.
func NewDebugSocket(path string) *DebugSocket {
return &DebugSocket{
path: path,
mux: http.NewServeMux(),
}
}
// InstallProfiling installs profiling endpoints in the socket.
func (s *DebugSocket) InstallProfiling() {
s.mux.HandleFunc("/debug/pprof", redirectTo("/debug/pprof/"))
s.mux.HandleFunc("/debug/pprof/", pprof.Index)
s.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
s.mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
s.mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
s.mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
}
// InstallDebugFlag installs debug flag endpoints in the socket.
func (s *DebugSocket) InstallDebugFlag(flag string, handler func(http.ResponseWriter, *http.Request)) {
f := DebugFlags{}
s.mux.HandleFunc("/debug/flags", f.Index)
s.mux.HandleFunc("/debug/flags/", f.Index)
url := path.Join("/debug/flags", flag)
s.mux.HandleFunc(url, handler)
f.addFlag(flag)
}
// Run starts the server and waits for stopCh to be closed to close the server.
func (s *DebugSocket) Run(stopCh <-chan struct{}) error {
if err := os.Remove(s.path); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove (%v): %v", s.path, err)
}
l, err := net.Listen("unix", s.path)
if err != nil {
return fmt.Errorf("listen error (%v): %v", s.path, err)
}
defer l.Close()
srv := http.Server{Handler: s.mux}
go func() {
<-stopCh
srv.Close()
}()
return srv.Serve(l)
}

View File

@ -24,6 +24,7 @@ import (
builder2 "k8s.io/kube-openapi/pkg/builder"
"k8s.io/kube-openapi/pkg/builder3"
"k8s.io/kube-openapi/pkg/common"
"k8s.io/kube-openapi/pkg/common/restfuladapter"
"k8s.io/kube-openapi/pkg/handler"
"k8s.io/kube-openapi/pkg/handler3"
"k8s.io/kube-openapi/pkg/validation/spec"
@ -36,16 +37,12 @@ type OpenAPI struct {
// Install adds the SwaggerUI webservice to the given mux.
func (oa OpenAPI) InstallV2(c *restful.Container, mux *mux.PathRecorderMux) (*handler.OpenAPIService, *spec.Swagger) {
spec, err := builder2.BuildOpenAPISpec(c.RegisteredWebServices(), oa.Config)
spec, err := builder2.BuildOpenAPISpecFromRoutes(restfuladapter.AdaptWebServices(c.RegisteredWebServices()), oa.Config)
if err != nil {
klog.Fatalf("Failed to build open api spec for root: %v", err)
}
spec.Definitions = handler.PruneDefaults(spec.Definitions)
openAPIVersionedService, err := handler.NewOpenAPIService(spec)
if err != nil {
klog.Fatalf("Failed to create OpenAPIService: %v", err)
}
openAPIVersionedService := handler.NewOpenAPIService(spec)
err = openAPIVersionedService.RegisterOpenAPIVersionedService("/openapi/v2", mux)
if err != nil {
klog.Fatalf("Failed to register versioned open api spec for root: %v", err)
@ -56,12 +53,8 @@ func (oa OpenAPI) InstallV2(c *restful.Container, mux *mux.PathRecorderMux) (*ha
// InstallV3 adds the static group/versions defined in the RegisteredWebServices to the OpenAPI v3 spec
func (oa OpenAPI) InstallV3(c *restful.Container, mux *mux.PathRecorderMux) *handler3.OpenAPIService {
openAPIVersionedService, err := handler3.NewOpenAPIService(nil)
if err != nil {
klog.Fatalf("Failed to create OpenAPIService: %v", err)
}
err = openAPIVersionedService.RegisterOpenAPIV3VersionedService("/openapi/v3", mux)
openAPIVersionedService := handler3.NewOpenAPIService()
err := openAPIVersionedService.RegisterOpenAPIV3VersionedService("/openapi/v3", mux)
if err != nil {
klog.Fatalf("Failed to register versioned open api spec for root: %v", err)
}
@ -75,7 +68,7 @@ func (oa OpenAPI) InstallV3(c *restful.Container, mux *mux.PathRecorderMux) *han
}
for gv, ws := range grouped {
spec, err := builder3.BuildOpenAPISpec(ws, oa.Config)
spec, err := builder3.BuildOpenAPISpecFromRoutes(restfuladapter.AdaptWebServices(ws), oa.Config)
if err != nil {
klog.Errorf("Failed to build OpenAPI v3 for group %s, %q", gv, err)