mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 10:33:35 +00:00
rebase: update replaced k8s.io modules to v0.33.0
Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
committed by
mergify[bot]
parent
dd77e72800
commit
107407b44b
545
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/cache_watcher.go
generated
vendored
545
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/cache_watcher.go
generated
vendored
@ -1,545 +0,0 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/cacher/metrics"
|
||||
utilflowcontrol "k8s.io/apiserver/pkg/util/flowcontrol"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// possible states of the cache watcher
|
||||
const (
|
||||
// cacheWatcherWaitingForBookmark indicates the cacher
|
||||
// is waiting for a bookmark event with a specific RV set
|
||||
cacheWatcherWaitingForBookmark = iota
|
||||
|
||||
// cacheWatcherBookmarkReceived indicates that the cacher
|
||||
// has received a bookmark event with required RV
|
||||
cacheWatcherBookmarkReceived
|
||||
|
||||
// cacheWatcherBookmarkSent indicates that the cacher
|
||||
// has already sent a bookmark event to a client
|
||||
cacheWatcherBookmarkSent
|
||||
)
|
||||
|
||||
// cacheWatcher implements watch.Interface
|
||||
// this is not thread-safe
|
||||
type cacheWatcher struct {
|
||||
input chan *watchCacheEvent
|
||||
result chan watch.Event
|
||||
done chan struct{}
|
||||
filter filterWithAttrsFunc
|
||||
stopped bool
|
||||
forget func(bool)
|
||||
versioner storage.Versioner
|
||||
// The watcher will be closed by server after the deadline,
|
||||
// save it here to send bookmark events before that.
|
||||
deadline time.Time
|
||||
allowWatchBookmarks bool
|
||||
groupResource schema.GroupResource
|
||||
|
||||
// human readable identifier that helps assigning cacheWatcher
|
||||
// instance with request
|
||||
identifier string
|
||||
|
||||
// drainInputBuffer indicates whether we should delay closing this watcher
|
||||
// and send all event in the input buffer.
|
||||
drainInputBuffer bool
|
||||
|
||||
// bookmarkAfterResourceVersion holds an RV that indicates
|
||||
// when we should start delivering bookmark events.
|
||||
// If this field holds the value of 0 that means
|
||||
// we don't have any special preferences toward delivering bookmark events.
|
||||
// Note that this field is used in conjunction with the state field.
|
||||
// It should not be changed once the watcher has been started.
|
||||
bookmarkAfterResourceVersion uint64
|
||||
|
||||
// stateMutex protects state
|
||||
stateMutex sync.Mutex
|
||||
|
||||
// state holds a numeric value indicating the current state of the watcher
|
||||
state int
|
||||
}
|
||||
|
||||
func newCacheWatcher(
|
||||
chanSize int,
|
||||
filter filterWithAttrsFunc,
|
||||
forget func(bool),
|
||||
versioner storage.Versioner,
|
||||
deadline time.Time,
|
||||
allowWatchBookmarks bool,
|
||||
groupResource schema.GroupResource,
|
||||
identifier string,
|
||||
) *cacheWatcher {
|
||||
return &cacheWatcher{
|
||||
input: make(chan *watchCacheEvent, chanSize),
|
||||
result: make(chan watch.Event, chanSize),
|
||||
done: make(chan struct{}),
|
||||
filter: filter,
|
||||
stopped: false,
|
||||
forget: forget,
|
||||
versioner: versioner,
|
||||
deadline: deadline,
|
||||
allowWatchBookmarks: allowWatchBookmarks,
|
||||
groupResource: groupResource,
|
||||
identifier: identifier,
|
||||
}
|
||||
}
|
||||
|
||||
// Implements watch.Interface.
|
||||
func (c *cacheWatcher) ResultChan() <-chan watch.Event {
|
||||
return c.result
|
||||
}
|
||||
|
||||
// Implements watch.Interface.
|
||||
func (c *cacheWatcher) Stop() {
|
||||
c.forget(false)
|
||||
}
|
||||
|
||||
// we rely on the fact that stopLocked is actually protected by Cacher.Lock()
|
||||
func (c *cacheWatcher) stopLocked() {
|
||||
if !c.stopped {
|
||||
c.stopped = true
|
||||
// stop without draining the input channel was requested.
|
||||
if !c.drainInputBuffer {
|
||||
close(c.done)
|
||||
}
|
||||
close(c.input)
|
||||
}
|
||||
|
||||
// Even if the watcher was already stopped, if it previously was
|
||||
// using draining mode and it's not using it now we need to
|
||||
// close the done channel now. Otherwise we could leak the
|
||||
// processing goroutine if it will be trying to put more objects
|
||||
// into result channel, the channel will be full and there will
|
||||
// already be noone on the processing the events on the receiving end.
|
||||
if !c.drainInputBuffer && !c.isDoneChannelClosedLocked() {
|
||||
close(c.done)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cacheWatcher) nonblockingAdd(event *watchCacheEvent) bool {
|
||||
// if the bookmarkAfterResourceVersion hasn't been seen
|
||||
// we will try to deliver a bookmark event every second.
|
||||
// the following check will discard a bookmark event
|
||||
// if it is < than the bookmarkAfterResourceVersion
|
||||
// so that we don't pollute the input channel
|
||||
if event.Type == watch.Bookmark && event.ResourceVersion < c.bookmarkAfterResourceVersion {
|
||||
return false
|
||||
}
|
||||
select {
|
||||
case c.input <- event:
|
||||
c.markBookmarkAfterRvAsReceived(event)
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Nil timer means that add will not block (if it can't send event immediately, it will break the watcher)
|
||||
//
|
||||
// Note that bookmark events are never added via the add method only via the nonblockingAdd.
|
||||
// Changing this behaviour will require moving the markBookmarkAfterRvAsReceived method
|
||||
func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool {
|
||||
// Try to send the event immediately, without blocking.
|
||||
if c.nonblockingAdd(event) {
|
||||
return true
|
||||
}
|
||||
|
||||
closeFunc := func() {
|
||||
// This means that we couldn't send event to that watcher.
|
||||
// Since we don't want to block on it infinitely,
|
||||
// we simply terminate it.
|
||||
metrics.TerminatedWatchersCounter.WithLabelValues(c.groupResource.String()).Inc()
|
||||
// This means that we couldn't send event to that watcher.
|
||||
// Since we don't want to block on it infinitely, we simply terminate it.
|
||||
|
||||
// we are graceful = false, when:
|
||||
//
|
||||
// (a) The bookmarkAfterResourceVersionReceived hasn't been received,
|
||||
// we can safely terminate the watcher. Because the client is waiting
|
||||
// for this specific bookmark, and we even haven't received one.
|
||||
// (b) We have seen the bookmarkAfterResourceVersion, and it was sent already to the client.
|
||||
// We can simply terminate the watcher.
|
||||
|
||||
// we are graceful = true, when:
|
||||
//
|
||||
// (a) We have seen a bookmark, but it hasn't been sent to the client yet.
|
||||
// That means we should drain the input buffer which contains
|
||||
// the bookmarkAfterResourceVersion we want. We do that to make progress
|
||||
// as clients can re-establish a new watch with the given RV and receive
|
||||
// further notifications.
|
||||
graceful := func() bool {
|
||||
c.stateMutex.Lock()
|
||||
defer c.stateMutex.Unlock()
|
||||
return c.state == cacheWatcherBookmarkReceived
|
||||
}()
|
||||
klog.V(1).Infof("Forcing %v watcher close due to unresponsiveness: %v. len(c.input) = %v, len(c.result) = %v, graceful = %v", c.groupResource.String(), c.identifier, len(c.input), len(c.result), graceful)
|
||||
c.forget(graceful)
|
||||
}
|
||||
|
||||
if timer == nil {
|
||||
closeFunc()
|
||||
return false
|
||||
}
|
||||
|
||||
// OK, block sending, but only until timer fires.
|
||||
select {
|
||||
case c.input <- event:
|
||||
return true
|
||||
case <-timer.C:
|
||||
closeFunc()
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cacheWatcher) nextBookmarkTime(now time.Time, bookmarkFrequency time.Duration) (time.Time, bool) {
|
||||
// We try to send bookmarks:
|
||||
//
|
||||
// (a) right before the watcher timeout - for now we simply set it 2s before
|
||||
// the deadline
|
||||
//
|
||||
// (b) roughly every minute
|
||||
//
|
||||
// (c) immediately when the bookmarkAfterResourceVersion wasn't confirmed
|
||||
// in this scenario the client have already seen (or is in the process of sending)
|
||||
// all initial data and is interested in seeing
|
||||
// a specific RV value (aka. the bookmarkAfterResourceVersion)
|
||||
// since we don't know when the cacher will see the RV we increase frequency
|
||||
//
|
||||
// (b) gives us periodicity if the watch breaks due to unexpected
|
||||
// conditions, (a) ensures that on timeout the watcher is as close to
|
||||
// now as possible - this covers 99% of cases.
|
||||
|
||||
if !c.wasBookmarkAfterRvReceived() {
|
||||
return time.Time{}, true // schedule immediately
|
||||
}
|
||||
|
||||
heartbeatTime := now.Add(bookmarkFrequency)
|
||||
if c.deadline.IsZero() {
|
||||
// Timeout is set by our client libraries (e.g. reflector) as well as defaulted by
|
||||
// apiserver if properly configured. So this shoudln't happen in practice.
|
||||
return heartbeatTime, true
|
||||
}
|
||||
if pretimeoutTime := c.deadline.Add(-2 * time.Second); pretimeoutTime.Before(heartbeatTime) {
|
||||
heartbeatTime = pretimeoutTime
|
||||
}
|
||||
|
||||
if heartbeatTime.Before(now) {
|
||||
return time.Time{}, false
|
||||
}
|
||||
return heartbeatTime, true
|
||||
}
|
||||
|
||||
// wasBookmarkAfterRvReceived same as wasBookmarkAfterRvReceivedLocked just acquires a lock
|
||||
func (c *cacheWatcher) wasBookmarkAfterRvReceived() bool {
|
||||
c.stateMutex.Lock()
|
||||
defer c.stateMutex.Unlock()
|
||||
return c.wasBookmarkAfterRvReceivedLocked()
|
||||
}
|
||||
|
||||
// wasBookmarkAfterRvReceivedLocked checks if the given cacheWatcher
|
||||
// have seen a bookmark event >= bookmarkAfterResourceVersion
|
||||
func (c *cacheWatcher) wasBookmarkAfterRvReceivedLocked() bool {
|
||||
return c.state != cacheWatcherWaitingForBookmark
|
||||
}
|
||||
|
||||
// markBookmarkAfterRvAsReceived indicates that the given cacheWatcher
|
||||
// have seen a bookmark event >= bookmarkAfterResourceVersion
|
||||
func (c *cacheWatcher) markBookmarkAfterRvAsReceived(event *watchCacheEvent) {
|
||||
if event.Type == watch.Bookmark {
|
||||
c.stateMutex.Lock()
|
||||
defer c.stateMutex.Unlock()
|
||||
if c.wasBookmarkAfterRvReceivedLocked() {
|
||||
return
|
||||
}
|
||||
// bookmark events are scheduled by startDispatchingBookmarkEvents method
|
||||
// since we received a bookmark event that means we have
|
||||
// converged towards the expected RV and it is okay to update the state so that
|
||||
// this cacher can be scheduler for a regular bookmark events
|
||||
c.state = cacheWatcherBookmarkReceived
|
||||
}
|
||||
}
|
||||
|
||||
// wasBookmarkAfterRvSentLocked checks if a bookmark event
|
||||
// with an RV >= the bookmarkAfterResourceVersion has been sent by this watcher
|
||||
func (c *cacheWatcher) wasBookmarkAfterRvSentLocked() bool {
|
||||
return c.state == cacheWatcherBookmarkSent
|
||||
}
|
||||
|
||||
// wasBookmarkAfterRvSent same as wasBookmarkAfterRvSentLocked just acquires a lock
|
||||
func (c *cacheWatcher) wasBookmarkAfterRvSent() bool {
|
||||
c.stateMutex.Lock()
|
||||
defer c.stateMutex.Unlock()
|
||||
return c.wasBookmarkAfterRvSentLocked()
|
||||
}
|
||||
|
||||
// markBookmarkAfterRvSent indicates that the given cacheWatcher
|
||||
// have sent a bookmark event with an RV >= the bookmarkAfterResourceVersion
|
||||
//
|
||||
// this function relies on the fact that the nonblockingAdd method
|
||||
// won't admit a bookmark event with an RV < the bookmarkAfterResourceVersion
|
||||
// so the first received bookmark event is considered to match the bookmarkAfterResourceVersion
|
||||
func (c *cacheWatcher) markBookmarkAfterRvSent(event *watchCacheEvent) {
|
||||
// note that bookmark events are not so common so will acquire a lock every ~60 second or so
|
||||
if event.Type == watch.Bookmark {
|
||||
c.stateMutex.Lock()
|
||||
defer c.stateMutex.Unlock()
|
||||
if !c.wasBookmarkAfterRvSentLocked() {
|
||||
c.state = cacheWatcherBookmarkSent
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// setBookmarkAfterResourceVersion sets the bookmarkAfterResourceVersion and the state associated with it
|
||||
func (c *cacheWatcher) setBookmarkAfterResourceVersion(bookmarkAfterResourceVersion uint64) {
|
||||
state := cacheWatcherWaitingForBookmark
|
||||
if bookmarkAfterResourceVersion == 0 {
|
||||
state = cacheWatcherBookmarkSent // if no specific RV was requested we assume no-op
|
||||
}
|
||||
c.state = state
|
||||
c.bookmarkAfterResourceVersion = bookmarkAfterResourceVersion
|
||||
}
|
||||
|
||||
// setDrainInputBufferLocked if set to true indicates that we should delay closing this watcher
|
||||
// until we send all events residing in the input buffer.
|
||||
func (c *cacheWatcher) setDrainInputBufferLocked(drain bool) {
|
||||
c.drainInputBuffer = drain
|
||||
}
|
||||
|
||||
// isDoneChannelClosed checks if c.done channel is closed
|
||||
func (c *cacheWatcher) isDoneChannelClosedLocked() bool {
|
||||
select {
|
||||
case <-c.done:
|
||||
return true
|
||||
default:
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getMutableObject(object runtime.Object) runtime.Object {
|
||||
if _, ok := object.(*cachingObject); ok {
|
||||
// It is safe to return without deep-copy, because the underlying
|
||||
// object will lazily perform deep-copy on the first try to change
|
||||
// any of its fields.
|
||||
return object
|
||||
}
|
||||
return object.DeepCopyObject()
|
||||
}
|
||||
|
||||
func updateResourceVersion(object runtime.Object, versioner storage.Versioner, resourceVersion uint64) {
|
||||
if err := versioner.UpdateObject(object, resourceVersion); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", resourceVersion, object, err))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cacheWatcher) convertToWatchEvent(event *watchCacheEvent) *watch.Event {
|
||||
if event.Type == watch.Bookmark {
|
||||
e := &watch.Event{Type: watch.Bookmark, Object: event.Object.DeepCopyObject()}
|
||||
if !c.wasBookmarkAfterRvSent() {
|
||||
if err := storage.AnnotateInitialEventsEndBookmark(e.Object); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("error while accessing object's metadata gr: %v, identifier: %v, obj: %#v, err: %v", c.groupResource, c.identifier, e.Object, err))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
curObjPasses := event.Type != watch.Deleted && c.filter(event.Key, event.ObjLabels, event.ObjFields)
|
||||
oldObjPasses := false
|
||||
if event.PrevObject != nil {
|
||||
oldObjPasses = c.filter(event.Key, event.PrevObjLabels, event.PrevObjFields)
|
||||
}
|
||||
if !curObjPasses && !oldObjPasses {
|
||||
// Watcher is not interested in that object.
|
||||
return nil
|
||||
}
|
||||
|
||||
switch {
|
||||
case curObjPasses && !oldObjPasses:
|
||||
return &watch.Event{Type: watch.Added, Object: getMutableObject(event.Object)}
|
||||
case curObjPasses && oldObjPasses:
|
||||
return &watch.Event{Type: watch.Modified, Object: getMutableObject(event.Object)}
|
||||
case !curObjPasses && oldObjPasses:
|
||||
// return a delete event with the previous object content, but with the event's resource version
|
||||
oldObj := getMutableObject(event.PrevObject)
|
||||
// We know that if oldObj is cachingObject (which can only be set via
|
||||
// setCachingObjects), its resourceVersion is already set correctly and
|
||||
// we don't need to update it. However, since cachingObject efficiently
|
||||
// handles noop updates, we avoid this microoptimization here.
|
||||
updateResourceVersion(oldObj, c.versioner, event.ResourceVersion)
|
||||
return &watch.Event{Type: watch.Deleted, Object: oldObj}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NOTE: sendWatchCacheEvent is assumed to not modify <event> !!!
|
||||
func (c *cacheWatcher) sendWatchCacheEvent(event *watchCacheEvent) {
|
||||
watchEvent := c.convertToWatchEvent(event)
|
||||
if watchEvent == nil {
|
||||
// Watcher is not interested in that object.
|
||||
return
|
||||
}
|
||||
|
||||
// We need to ensure that if we put event X to the c.result, all
|
||||
// previous events were already put into it before, no matter whether
|
||||
// c.done is close or not.
|
||||
// Thus we cannot simply select from c.done and c.result and this
|
||||
// would give us non-determinism.
|
||||
// At the same time, we don't want to block infinitely on putting
|
||||
// to c.result, when c.done is already closed.
|
||||
//
|
||||
// This ensures that with c.done already close, we at most once go
|
||||
// into the next select after this. With that, no matter which
|
||||
// statement we choose there, we will deliver only consecutive
|
||||
// events.
|
||||
select {
|
||||
case <-c.done:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
select {
|
||||
case c.result <- *watchEvent:
|
||||
c.markBookmarkAfterRvSent(event)
|
||||
case <-c.done:
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cacheWatcher) processInterval(ctx context.Context, cacheInterval *watchCacheInterval, resourceVersion uint64) {
|
||||
defer utilruntime.HandleCrash()
|
||||
defer close(c.result)
|
||||
defer c.Stop()
|
||||
|
||||
// Check how long we are processing initEvents.
|
||||
// As long as these are not processed, we are not processing
|
||||
// any incoming events, so if it takes long, we may actually
|
||||
// block all watchers for some time.
|
||||
// TODO: From the logs it seems that there happens processing
|
||||
// times even up to 1s which is very long. However, this doesn't
|
||||
// depend that much on the number of initEvents. E.g. from the
|
||||
// 2000-node Kubemark run we have logs like this, e.g.:
|
||||
// ... processing 13862 initEvents took 66.808689ms
|
||||
// ... processing 14040 initEvents took 993.532539ms
|
||||
// We should understand what is blocking us in those cases (e.g.
|
||||
// is it lack of CPU, network, or sth else) and potentially
|
||||
// consider increase size of result buffer in those cases.
|
||||
const initProcessThreshold = 500 * time.Millisecond
|
||||
startTime := time.Now()
|
||||
|
||||
// cacheInterval may be created from a version being more fresh than requested
|
||||
// (e.g. for NotOlderThan semantic). In such a case, we need to prevent watch event
|
||||
// with lower resourceVersion from being delivered to ensure watch contract.
|
||||
if cacheInterval.resourceVersion > resourceVersion {
|
||||
resourceVersion = cacheInterval.resourceVersion
|
||||
}
|
||||
|
||||
initEventCount := 0
|
||||
for {
|
||||
event, err := cacheInterval.Next()
|
||||
if err != nil {
|
||||
// An error indicates that the cache interval
|
||||
// has been invalidated and can no longer serve
|
||||
// events.
|
||||
//
|
||||
// Initially we considered sending an "out-of-history"
|
||||
// Error event in this case, but because historically
|
||||
// such events weren't sent out of the watchCache, we
|
||||
// decided not to. This is still ok, because on watch
|
||||
// closure, the watcher will try to re-instantiate the
|
||||
// watch and then will get an explicit "out-of-history"
|
||||
// window. There is potential for optimization, but for
|
||||
// now, in order to be on the safe side and not break
|
||||
// custom clients, the cost of it is something that we
|
||||
// are fully accepting.
|
||||
klog.Warningf("couldn't retrieve watch event to serve: %#v", err)
|
||||
return
|
||||
}
|
||||
if event == nil {
|
||||
break
|
||||
}
|
||||
c.sendWatchCacheEvent(event)
|
||||
|
||||
// With some events already sent, update resourceVersion so that
|
||||
// events that were buffered and not yet processed won't be delivered
|
||||
// to this watcher second time causing going back in time.
|
||||
//
|
||||
// There is one case where events are not necessary ordered by
|
||||
// resourceVersion, being a case of watching from resourceVersion=0,
|
||||
// which at the beginning returns the state of each objects.
|
||||
// For the purpose of it, we need to max it with the resource version
|
||||
// that we have so far.
|
||||
if event.ResourceVersion > resourceVersion {
|
||||
resourceVersion = event.ResourceVersion
|
||||
}
|
||||
initEventCount++
|
||||
}
|
||||
|
||||
if initEventCount > 0 {
|
||||
metrics.InitCounter.WithLabelValues(c.groupResource.String()).Add(float64(initEventCount))
|
||||
}
|
||||
processingTime := time.Since(startTime)
|
||||
if processingTime > initProcessThreshold {
|
||||
klog.V(2).Infof("processing %d initEvents of %s (%s) took %v", initEventCount, c.groupResource, c.identifier, processingTime)
|
||||
}
|
||||
|
||||
// send bookmark after sending all events in cacheInterval for watchlist request
|
||||
if cacheInterval.initialEventsEndBookmark != nil {
|
||||
c.sendWatchCacheEvent(cacheInterval.initialEventsEndBookmark)
|
||||
}
|
||||
c.process(ctx, resourceVersion)
|
||||
}
|
||||
|
||||
func (c *cacheWatcher) process(ctx context.Context, resourceVersion uint64) {
|
||||
// At this point we already start processing incoming watch events.
|
||||
// However, the init event can still be processed because their serialization
|
||||
// and sending to the client happens asynchrnously.
|
||||
// TODO: As describe in the KEP, we would like to estimate that by delaying
|
||||
// the initialization signal proportionally to the number of events to
|
||||
// process, but we're leaving this to the tuning phase.
|
||||
utilflowcontrol.WatchInitialized(ctx)
|
||||
|
||||
for {
|
||||
select {
|
||||
case event, ok := <-c.input:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
// only send events newer than resourceVersion
|
||||
// or a bookmark event with an RV equal to resourceVersion
|
||||
// if we haven't sent one to the client
|
||||
if event.ResourceVersion > resourceVersion || (event.Type == watch.Bookmark && event.ResourceVersion == resourceVersion && !c.wasBookmarkAfterRvSent()) {
|
||||
c.sendWatchCacheEvent(event)
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
1408
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/cacher.go
generated
vendored
1408
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/cacher.go
generated
vendored
File diff suppressed because it is too large
Load Diff
412
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/caching_object.go
generated
vendored
412
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/caching_object.go
generated
vendored
@ -1,412 +0,0 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
var _ runtime.CacheableObject = &cachingObject{}
|
||||
|
||||
// metaRuntimeInterface implements runtime.Object and
|
||||
// metav1.Object interfaces.
|
||||
type metaRuntimeInterface interface {
|
||||
runtime.Object
|
||||
metav1.Object
|
||||
}
|
||||
|
||||
// serializationResult captures a result of serialization.
|
||||
type serializationResult struct {
|
||||
// once should be used to ensure serialization is computed once.
|
||||
once sync.Once
|
||||
|
||||
// raw is serialized object.
|
||||
raw []byte
|
||||
// err is error from serialization.
|
||||
err error
|
||||
}
|
||||
|
||||
// serializationsCache is a type for caching serialization results.
|
||||
type serializationsCache map[runtime.Identifier]*serializationResult
|
||||
|
||||
// cachingObject is an object that is able to cache its serializations
|
||||
// so that each of those is computed exactly once.
|
||||
//
|
||||
// cachingObject implements the metav1.Object interface (accessors for
|
||||
// all metadata fields).
|
||||
type cachingObject struct {
|
||||
lock sync.RWMutex
|
||||
|
||||
// deepCopied defines whether the object below has already been
|
||||
// deep copied. The operation is performed lazily on the first
|
||||
// setXxx operation.
|
||||
//
|
||||
// The lazy deep-copy make is useful, as effectively the only
|
||||
// case when we are setting some fields are ResourceVersion for
|
||||
// DELETE events, so in all other cases we can effectively avoid
|
||||
// performing any deep copies.
|
||||
deepCopied bool
|
||||
|
||||
// Object for which serializations are cached.
|
||||
object metaRuntimeInterface
|
||||
|
||||
// serializations is a cache containing object`s serializations.
|
||||
// The value stored in atomic.Value is of type serializationsCache.
|
||||
// The atomic.Value type is used to allow fast-path.
|
||||
serializations atomic.Value
|
||||
}
|
||||
|
||||
// newCachingObject performs a deep copy of the given object and wraps it
|
||||
// into a cachingObject.
|
||||
// An error is returned if it's not possible to cast the object to
|
||||
// metav1.Object type.
|
||||
func newCachingObject(object runtime.Object) (*cachingObject, error) {
|
||||
if obj, ok := object.(metaRuntimeInterface); ok {
|
||||
result := &cachingObject{
|
||||
object: obj,
|
||||
deepCopied: false,
|
||||
}
|
||||
result.serializations.Store(make(serializationsCache))
|
||||
return result, nil
|
||||
}
|
||||
return nil, fmt.Errorf("can't cast object to metav1.Object: %#v", object)
|
||||
}
|
||||
|
||||
func (o *cachingObject) getSerializationResult(id runtime.Identifier) *serializationResult {
|
||||
// Fast-path for getting from cache.
|
||||
serializations := o.serializations.Load().(serializationsCache)
|
||||
if result, exists := serializations[id]; exists {
|
||||
return result
|
||||
}
|
||||
|
||||
// Slow-path (that may require insert).
|
||||
o.lock.Lock()
|
||||
defer o.lock.Unlock()
|
||||
|
||||
serializations = o.serializations.Load().(serializationsCache)
|
||||
// Check if in the meantime it wasn't inserted.
|
||||
if result, exists := serializations[id]; exists {
|
||||
return result
|
||||
}
|
||||
|
||||
// Insert an entry for <id>. This requires copy of existing map.
|
||||
newSerializations := make(serializationsCache)
|
||||
for k, v := range serializations {
|
||||
newSerializations[k] = v
|
||||
}
|
||||
result := &serializationResult{}
|
||||
newSerializations[id] = result
|
||||
o.serializations.Store(newSerializations)
|
||||
return result
|
||||
}
|
||||
|
||||
// CacheEncode implements runtime.CacheableObject interface.
|
||||
// It serializes the object and writes the result to given io.Writer trying
|
||||
// to first use the already cached result and falls back to a given encode
|
||||
// function in case of cache miss.
|
||||
// It assumes that for a given identifier, the encode function always encodes
|
||||
// each input object into the same output format.
|
||||
func (o *cachingObject) CacheEncode(id runtime.Identifier, encode func(runtime.Object, io.Writer) error, w io.Writer) error {
|
||||
result := o.getSerializationResult(id)
|
||||
result.once.Do(func() {
|
||||
buffer := bytes.NewBuffer(nil)
|
||||
// TODO(wojtek-t): This is currently making a copy to avoid races
|
||||
// in cases where encoding is making subtle object modifications,
|
||||
// e.g. #82497
|
||||
// Figure out if we can somehow avoid this under some conditions.
|
||||
result.err = encode(o.GetObject(), buffer)
|
||||
result.raw = buffer.Bytes()
|
||||
})
|
||||
// Once invoked, fields of serialization will not change.
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
}
|
||||
if b, support := w.(runtime.Splice); support {
|
||||
b.Splice(result.raw)
|
||||
return nil
|
||||
}
|
||||
_, err := w.Write(result.raw)
|
||||
return err
|
||||
}
|
||||
|
||||
// GetObject implements runtime.CacheableObject interface.
|
||||
// It returns deep-copy of the wrapped object to return ownership of it
|
||||
// to the called according to the contract of the interface.
|
||||
func (o *cachingObject) GetObject() runtime.Object {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.DeepCopyObject().(metaRuntimeInterface)
|
||||
}
|
||||
|
||||
// GetObjectKind implements runtime.Object interface.
|
||||
func (o *cachingObject) GetObjectKind() schema.ObjectKind {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetObjectKind()
|
||||
}
|
||||
|
||||
// DeepCopyObject implements runtime.Object interface.
|
||||
func (o *cachingObject) DeepCopyObject() runtime.Object {
|
||||
// DeepCopyObject on cachingObject is not expected to be called anywhere.
|
||||
// However, to be on the safe-side, we implement it, though given the
|
||||
// cache is only an optimization we ignore copying it.
|
||||
result := &cachingObject{
|
||||
deepCopied: true,
|
||||
}
|
||||
result.serializations.Store(make(serializationsCache))
|
||||
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
result.object = o.object.DeepCopyObject().(metaRuntimeInterface)
|
||||
return result
|
||||
}
|
||||
|
||||
var (
|
||||
invalidationCacheTimestampLock sync.Mutex
|
||||
invalidationCacheTimestamp time.Time
|
||||
)
|
||||
|
||||
// shouldLogCacheInvalidation allows for logging cache-invalidation
|
||||
// at most once per second (to avoid spamming logs in case of issues).
|
||||
func shouldLogCacheInvalidation(now time.Time) bool {
|
||||
invalidationCacheTimestampLock.Lock()
|
||||
defer invalidationCacheTimestampLock.Unlock()
|
||||
if invalidationCacheTimestamp.Add(time.Second).Before(now) {
|
||||
invalidationCacheTimestamp = now
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (o *cachingObject) invalidateCacheLocked() {
|
||||
if cache, ok := o.serializations.Load().(serializationsCache); ok && len(cache) == 0 {
|
||||
return
|
||||
}
|
||||
// We don't expect cache invalidation to happen - so we want
|
||||
// to log the stacktrace to allow debugging if that will happen.
|
||||
// OTOH, we don't want to spam logs with it.
|
||||
// So we try to log it at most once per second.
|
||||
if shouldLogCacheInvalidation(time.Now()) {
|
||||
klog.Warningf("Unexpected cache invalidation for %#v\n%s", o.object, string(debug.Stack()))
|
||||
}
|
||||
o.serializations.Store(make(serializationsCache))
|
||||
}
|
||||
|
||||
// The following functions implement metav1.Object interface:
|
||||
// - getters simply delegate for the underlying object
|
||||
// - setters check if operations isn't noop and if so,
|
||||
// invalidate the cache and delegate for the underlying object
|
||||
|
||||
func (o *cachingObject) conditionalSet(isNoop func() bool, set func()) {
|
||||
if fastPath := func() bool {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return isNoop()
|
||||
}(); fastPath {
|
||||
return
|
||||
}
|
||||
o.lock.Lock()
|
||||
defer o.lock.Unlock()
|
||||
if isNoop() {
|
||||
return
|
||||
}
|
||||
if !o.deepCopied {
|
||||
o.object = o.object.DeepCopyObject().(metaRuntimeInterface)
|
||||
o.deepCopied = true
|
||||
}
|
||||
o.invalidateCacheLocked()
|
||||
set()
|
||||
}
|
||||
|
||||
func (o *cachingObject) GetNamespace() string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetNamespace()
|
||||
}
|
||||
func (o *cachingObject) SetNamespace(namespace string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetNamespace() == namespace },
|
||||
func() { o.object.SetNamespace(namespace) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetName() string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetName()
|
||||
}
|
||||
func (o *cachingObject) SetName(name string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetName() == name },
|
||||
func() { o.object.SetName(name) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetGenerateName() string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetGenerateName()
|
||||
}
|
||||
func (o *cachingObject) SetGenerateName(name string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetGenerateName() == name },
|
||||
func() { o.object.SetGenerateName(name) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetUID() types.UID {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetUID()
|
||||
}
|
||||
func (o *cachingObject) SetUID(uid types.UID) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetUID() == uid },
|
||||
func() { o.object.SetUID(uid) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetResourceVersion() string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetResourceVersion()
|
||||
}
|
||||
func (o *cachingObject) SetResourceVersion(version string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetResourceVersion() == version },
|
||||
func() { o.object.SetResourceVersion(version) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetGeneration() int64 {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetGeneration()
|
||||
}
|
||||
func (o *cachingObject) SetGeneration(generation int64) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetGeneration() == generation },
|
||||
func() { o.object.SetGeneration(generation) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetSelfLink() string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetSelfLink()
|
||||
}
|
||||
func (o *cachingObject) SetSelfLink(selfLink string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetSelfLink() == selfLink },
|
||||
func() { o.object.SetSelfLink(selfLink) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetCreationTimestamp() metav1.Time {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetCreationTimestamp()
|
||||
}
|
||||
func (o *cachingObject) SetCreationTimestamp(timestamp metav1.Time) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetCreationTimestamp() == timestamp },
|
||||
func() { o.object.SetCreationTimestamp(timestamp) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetDeletionTimestamp() *metav1.Time {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetDeletionTimestamp()
|
||||
}
|
||||
func (o *cachingObject) SetDeletionTimestamp(timestamp *metav1.Time) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetDeletionTimestamp() == timestamp },
|
||||
func() { o.object.SetDeletionTimestamp(timestamp) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetDeletionGracePeriodSeconds() *int64 {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetDeletionGracePeriodSeconds()
|
||||
}
|
||||
func (o *cachingObject) SetDeletionGracePeriodSeconds(gracePeriodSeconds *int64) {
|
||||
o.conditionalSet(
|
||||
func() bool { return o.object.GetDeletionGracePeriodSeconds() == gracePeriodSeconds },
|
||||
func() { o.object.SetDeletionGracePeriodSeconds(gracePeriodSeconds) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetLabels() map[string]string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetLabels()
|
||||
}
|
||||
func (o *cachingObject) SetLabels(labels map[string]string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return reflect.DeepEqual(o.object.GetLabels(), labels) },
|
||||
func() { o.object.SetLabels(labels) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetAnnotations() map[string]string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetAnnotations()
|
||||
}
|
||||
func (o *cachingObject) SetAnnotations(annotations map[string]string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return reflect.DeepEqual(o.object.GetAnnotations(), annotations) },
|
||||
func() { o.object.SetAnnotations(annotations) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetFinalizers() []string {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetFinalizers()
|
||||
}
|
||||
func (o *cachingObject) SetFinalizers(finalizers []string) {
|
||||
o.conditionalSet(
|
||||
func() bool { return reflect.DeepEqual(o.object.GetFinalizers(), finalizers) },
|
||||
func() { o.object.SetFinalizers(finalizers) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetOwnerReferences() []metav1.OwnerReference {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetOwnerReferences()
|
||||
}
|
||||
func (o *cachingObject) SetOwnerReferences(references []metav1.OwnerReference) {
|
||||
o.conditionalSet(
|
||||
func() bool { return reflect.DeepEqual(o.object.GetOwnerReferences(), references) },
|
||||
func() { o.object.SetOwnerReferences(references) },
|
||||
)
|
||||
}
|
||||
func (o *cachingObject) GetManagedFields() []metav1.ManagedFieldsEntry {
|
||||
o.lock.RLock()
|
||||
defer o.lock.RUnlock()
|
||||
return o.object.GetManagedFields()
|
||||
}
|
||||
func (o *cachingObject) SetManagedFields(managedFields []metav1.ManagedFieldsEntry) {
|
||||
o.conditionalSet(
|
||||
func() bool { return reflect.DeepEqual(o.object.GetManagedFields(), managedFields) },
|
||||
func() { o.object.SetManagedFields(managedFields) },
|
||||
)
|
||||
}
|
437
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator.go
generated
vendored
437
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator.go
generated
vendored
@ -1,437 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/fnv"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/apiserver/pkg/audit"
|
||||
"k8s.io/apiserver/pkg/features"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/cacher/delegator"
|
||||
"k8s.io/apiserver/pkg/storage/cacher/metrics"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/tracing"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
// ConsistencyCheckPeriod is the period of checking consistency between etcd and cache.
|
||||
// 5 minutes were proposed to match the default compaction period. It's magnitute higher than
|
||||
// List latency SLO (30 seconds) and timeout (1 minute).
|
||||
ConsistencyCheckPeriod = 5 * time.Minute
|
||||
// ConsistencyCheckerEnabled enables the consistency checking mechanism for cache.
|
||||
// Based on KUBE_WATCHCACHE_CONSISTENCY_CHECKER environment variable.
|
||||
ConsistencyCheckerEnabled = false
|
||||
)
|
||||
|
||||
func init() {
|
||||
ConsistencyCheckerEnabled, _ = strconv.ParseBool(os.Getenv("KUBE_WATCHCACHE_CONSISTENCY_CHECKER"))
|
||||
}
|
||||
|
||||
func NewCacheDelegator(cacher *Cacher, storage storage.Interface) *CacheDelegator {
|
||||
d := &CacheDelegator{
|
||||
cacher: cacher,
|
||||
storage: storage,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
if ConsistencyCheckerEnabled {
|
||||
d.checker = newConsistencyChecker(cacher.resourcePrefix, cacher.newListFunc, cacher, storage)
|
||||
d.wg.Add(1)
|
||||
go func() {
|
||||
defer d.wg.Done()
|
||||
d.checker.startChecking(d.stopCh)
|
||||
}()
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
type CacheDelegator struct {
|
||||
cacher *Cacher
|
||||
storage storage.Interface
|
||||
checker *consistencyChecker
|
||||
|
||||
wg sync.WaitGroup
|
||||
stopOnce sync.Once
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
var _ storage.Interface = (*CacheDelegator)(nil)
|
||||
|
||||
func (c *CacheDelegator) Versioner() storage.Versioner {
|
||||
return c.storage.Versioner()
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
|
||||
return c.storage.Create(ctx, key, obj, out, ttl)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) GetCurrentResourceVersion(ctx context.Context) (uint64, error) {
|
||||
return c.storage.GetCurrentResourceVersion(ctx)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions, validateDeletion storage.ValidateObjectFunc, cachedExistingObject runtime.Object, opts storage.DeleteOptions) error {
|
||||
// Ignore the suggestion and try to pass down the current version of the object
|
||||
// read from cache.
|
||||
if elem, exists, err := c.cacher.watchCache.GetByKey(key); err != nil {
|
||||
klog.Errorf("GetByKey returned error: %v", err)
|
||||
} else if exists {
|
||||
// DeepCopy the object since we modify resource version when serializing the
|
||||
// current object.
|
||||
currObj := elem.(*storeElement).Object.DeepCopyObject()
|
||||
return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, currObj, opts)
|
||||
}
|
||||
// If we couldn't get the object, fallback to no-suggestion.
|
||||
return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, nil, opts)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) Watch(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
|
||||
// if the watch-list feature wasn't set and the resourceVersion is unset
|
||||
// ensure that the rv from which the watch is being served, is the latest
|
||||
// one. "latest" is ensured by serving the watch from
|
||||
// the underlying storage.
|
||||
//
|
||||
// it should never happen due to our validation but let's just be super-safe here
|
||||
// and disable sendingInitialEvents when the feature wasn't enabled
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) && opts.SendInitialEvents != nil {
|
||||
opts.SendInitialEvents = nil
|
||||
}
|
||||
// TODO: we should eventually get rid of this legacy case
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.WatchFromStorageWithoutResourceVersion) && opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
|
||||
return c.storage.Watch(ctx, key, opts)
|
||||
}
|
||||
return c.cacher.Watch(ctx, key, opts)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
|
||||
ctx, span := tracing.Start(ctx, "cacher.Get",
|
||||
attribute.String("audit-id", audit.GetAuditIDTruncated(ctx)),
|
||||
attribute.String("key", key),
|
||||
attribute.String("resource-version", opts.ResourceVersion))
|
||||
defer span.End(500 * time.Millisecond)
|
||||
if opts.ResourceVersion == "" {
|
||||
// If resourceVersion is not specified, serve it from underlying
|
||||
// storage (for backward compatibility).
|
||||
span.AddEvent("About to Get from underlying storage")
|
||||
return c.storage.Get(ctx, key, opts, objPtr)
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
|
||||
if !c.cacher.Ready() {
|
||||
// If Cache is not initialized, delegator Get requests to storage
|
||||
// as described in https://kep.k8s.io/4568
|
||||
span.AddEvent("About to Get from underlying storage - cache not initialized")
|
||||
return c.storage.Get(ctx, key, opts, objPtr)
|
||||
}
|
||||
}
|
||||
// If resourceVersion is specified, serve it from cache.
|
||||
// It's guaranteed that the returned value is at least that
|
||||
// fresh as the given resourceVersion.
|
||||
getRV, err := c.cacher.versioner.ParseResourceVersion(opts.ResourceVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Do not create a trace - it's not for free and there are tons
|
||||
// of Get requests. We can add it if it will be really needed.
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
|
||||
if getRV == 0 && !c.cacher.Ready() {
|
||||
// If Cacher is not yet initialized and we don't require any specific
|
||||
// minimal resource version, simply forward the request to storage.
|
||||
return c.storage.Get(ctx, key, opts, objPtr)
|
||||
}
|
||||
if err := c.cacher.ready.wait(ctx); err != nil {
|
||||
return errors.NewServiceUnavailable(err.Error())
|
||||
}
|
||||
}
|
||||
span.AddEvent("About to fetch object from cache")
|
||||
return c.cacher.Get(ctx, key, opts, objPtr)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
|
||||
_, _, err := storage.ValidateListOptions(c.cacher.resourcePrefix, c.cacher.versioner, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
result, err := delegator.ShouldDelegateList(opts, c.cacher)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if result.ShouldDelegate {
|
||||
return c.storage.GetList(ctx, key, opts, listObj)
|
||||
}
|
||||
|
||||
listRV, err := c.cacher.versioner.ParseResourceVersion(opts.ResourceVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
|
||||
if !c.cacher.Ready() && shouldDelegateListOnNotReadyCache(opts) {
|
||||
// If Cacher is not initialized, delegator List requests to storage
|
||||
// as described in https://kep.k8s.io/4568
|
||||
return c.storage.GetList(ctx, key, opts, listObj)
|
||||
}
|
||||
} else {
|
||||
if listRV == 0 && !c.cacher.Ready() {
|
||||
// If Cacher is not yet initialized and we don't require any specific
|
||||
// minimal resource version, simply forward the request to storage.
|
||||
return c.storage.GetList(ctx, key, opts, listObj)
|
||||
}
|
||||
}
|
||||
if result.ConsistentRead {
|
||||
listRV, err = c.storage.GetCurrentResourceVersion(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Setting resource version for consistent read in cache based on current ResourceVersion in etcd.
|
||||
opts.ResourceVersion = strconv.FormatInt(int64(listRV), 10)
|
||||
}
|
||||
err = c.cacher.GetList(ctx, key, opts, listObj)
|
||||
success := "true"
|
||||
fallback := "false"
|
||||
if err != nil {
|
||||
if errors.IsResourceExpired(err) {
|
||||
return c.storage.GetList(ctx, key, opts, listObj)
|
||||
}
|
||||
if result.ConsistentRead {
|
||||
if storage.IsTooLargeResourceVersion(err) {
|
||||
fallback = "true"
|
||||
// Reset resourceVersion during fallback from consistent read.
|
||||
opts.ResourceVersion = ""
|
||||
err = c.storage.GetList(ctx, key, opts, listObj)
|
||||
}
|
||||
if err != nil {
|
||||
success = "false"
|
||||
}
|
||||
metrics.ConsistentReadTotal.WithLabelValues(c.cacher.resourcePrefix, success, fallback).Add(1)
|
||||
}
|
||||
return err
|
||||
}
|
||||
if result.ConsistentRead {
|
||||
metrics.ConsistentReadTotal.WithLabelValues(c.cacher.resourcePrefix, success, fallback).Add(1)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func shouldDelegateListOnNotReadyCache(opts storage.ListOptions) bool {
|
||||
pred := opts.Predicate
|
||||
noLabelSelector := pred.Label == nil || pred.Label.Empty()
|
||||
noFieldSelector := pred.Field == nil || pred.Field.Empty()
|
||||
hasLimit := pred.Limit > 0
|
||||
return noLabelSelector && noFieldSelector && hasLimit
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) GuaranteedUpdate(ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool, preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, cachedExistingObject runtime.Object) error {
|
||||
// Ignore the suggestion and try to pass down the current version of the object
|
||||
// read from cache.
|
||||
if elem, exists, err := c.cacher.watchCache.GetByKey(key); err != nil {
|
||||
klog.Errorf("GetByKey returned error: %v", err)
|
||||
} else if exists {
|
||||
// DeepCopy the object since we modify resource version when serializing the
|
||||
// current object.
|
||||
currObj := elem.(*storeElement).Object.DeepCopyObject()
|
||||
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, currObj)
|
||||
}
|
||||
// If we couldn't get the object, fallback to no-suggestion.
|
||||
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, nil)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) Count(pathPrefix string) (int64, error) {
|
||||
return c.storage.Count(pathPrefix)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) ReadinessCheck() error {
|
||||
if !c.cacher.Ready() {
|
||||
return storage.ErrStorageNotReady
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) RequestWatchProgress(ctx context.Context) error {
|
||||
return c.storage.RequestWatchProgress(ctx)
|
||||
}
|
||||
|
||||
func (c *CacheDelegator) Stop() {
|
||||
c.stopOnce.Do(func() {
|
||||
close(c.stopCh)
|
||||
})
|
||||
c.wg.Wait()
|
||||
}
|
||||
|
||||
func newConsistencyChecker(resourcePrefix string, newListFunc func() runtime.Object, cacher getListerReady, etcd getLister) *consistencyChecker {
|
||||
return &consistencyChecker{
|
||||
resourcePrefix: resourcePrefix,
|
||||
newListFunc: newListFunc,
|
||||
cacher: cacher,
|
||||
etcd: etcd,
|
||||
}
|
||||
}
|
||||
|
||||
type consistencyChecker struct {
|
||||
resourcePrefix string
|
||||
newListFunc func() runtime.Object
|
||||
|
||||
cacher getListerReady
|
||||
etcd getLister
|
||||
}
|
||||
|
||||
type getListerReady interface {
|
||||
getLister
|
||||
Ready() bool
|
||||
}
|
||||
|
||||
type getLister interface {
|
||||
GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error
|
||||
}
|
||||
|
||||
func (c consistencyChecker) startChecking(stopCh <-chan struct{}) {
|
||||
err := wait.PollUntilContextCancel(wait.ContextForChannel(stopCh), ConsistencyCheckPeriod, false, func(ctx context.Context) (done bool, err error) {
|
||||
c.check(ctx)
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
klog.InfoS("Cache consistency check exiting", "resource", c.resourcePrefix, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *consistencyChecker) check(ctx context.Context) {
|
||||
digests, err := c.calculateDigests(ctx)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Cache consistency check error", "resource", c.resourcePrefix)
|
||||
metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "error").Inc()
|
||||
return
|
||||
}
|
||||
if digests.CacheDigest == digests.EtcdDigest {
|
||||
klog.V(3).InfoS("Cache consistency check passed", "resource", c.resourcePrefix, "resourceVersion", digests.ResourceVersion, "digest", digests.CacheDigest)
|
||||
metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "success").Inc()
|
||||
} else {
|
||||
klog.ErrorS(nil, "Cache consistency check failed", "resource", c.resourcePrefix, "resourceVersion", digests.ResourceVersion, "etcdDigest", digests.EtcdDigest, "cacheDigest", digests.CacheDigest)
|
||||
metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "failure").Inc()
|
||||
// Panic on internal consistency checking enabled only by environment variable. R
|
||||
panic(fmt.Sprintf("Cache consistency check failed, resource: %q, resourceVersion: %q, etcdDigest: %q, cacheDigest: %q", c.resourcePrefix, digests.ResourceVersion, digests.EtcdDigest, digests.CacheDigest))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *consistencyChecker) calculateDigests(ctx context.Context) (*storageDigest, error) {
|
||||
if !c.cacher.Ready() {
|
||||
return nil, fmt.Errorf("cache is not ready")
|
||||
}
|
||||
cacheDigest, resourceVersion, err := c.calculateStoreDigest(ctx, c.cacher, storage.ListOptions{
|
||||
Recursive: true,
|
||||
ResourceVersion: "0",
|
||||
Predicate: storage.Everything,
|
||||
ResourceVersionMatch: metav1.ResourceVersionMatchNotOlderThan,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed calculating cache digest: %w", err)
|
||||
}
|
||||
etcdDigest, _, err := c.calculateStoreDigest(ctx, c.etcd, storage.ListOptions{
|
||||
Recursive: true,
|
||||
ResourceVersion: resourceVersion,
|
||||
Predicate: storage.Everything,
|
||||
ResourceVersionMatch: metav1.ResourceVersionMatchExact,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed calculating etcd digest: %w", err)
|
||||
}
|
||||
return &storageDigest{
|
||||
ResourceVersion: resourceVersion,
|
||||
CacheDigest: cacheDigest,
|
||||
EtcdDigest: etcdDigest,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type storageDigest struct {
|
||||
ResourceVersion string
|
||||
CacheDigest string
|
||||
EtcdDigest string
|
||||
}
|
||||
|
||||
func (c *consistencyChecker) calculateStoreDigest(ctx context.Context, store getLister, opts storage.ListOptions) (digest, rv string, err error) {
|
||||
// TODO: Implement pagination
|
||||
resp := c.newListFunc()
|
||||
err = store.GetList(ctx, c.resourcePrefix, opts, resp)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
digest, err = listDigest(resp)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
list, err := meta.ListAccessor(resp)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
return digest, list.GetResourceVersion(), nil
|
||||
}
|
||||
|
||||
func listDigest(list runtime.Object) (string, error) {
|
||||
h := fnv.New64()
|
||||
err := meta.EachListItem(list, func(obj runtime.Object) error {
|
||||
objectMeta, err := meta.Accessor(obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = addObjectToDigest(h, objectMeta)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("%x", h.Sum64()), nil
|
||||
}
|
||||
|
||||
func addObjectToDigest(h hash.Hash64, objectMeta metav1.Object) error {
|
||||
_, err := h.Write([]byte(objectMeta.GetNamespace()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = h.Write([]byte("/"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = h.Write([]byte(objectMeta.GetName()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = h.Write([]byte("/"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = h.Write([]byte(objectMeta.GetResourceVersion()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
113
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator/interface.go
generated
vendored
113
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator/interface.go
generated
vendored
@ -1,113 +0,0 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package delegator
|
||||
|
||||
import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apiserver/pkg/features"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
etcdfeature "k8s.io/apiserver/pkg/storage/feature"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
)
|
||||
|
||||
func ShouldDelegateListMeta(opts *metav1.ListOptions, cache Helper) (Result, error) {
|
||||
return ShouldDelegateList(
|
||||
storage.ListOptions{
|
||||
ResourceVersionMatch: opts.ResourceVersionMatch,
|
||||
ResourceVersion: opts.ResourceVersion,
|
||||
Predicate: storage.SelectionPredicate{
|
||||
Continue: opts.Continue,
|
||||
Limit: opts.Limit,
|
||||
},
|
||||
Recursive: true,
|
||||
}, cache)
|
||||
}
|
||||
|
||||
func ShouldDelegateList(opts storage.ListOptions, cache Helper) (Result, error) {
|
||||
// see https://kubernetes.io/docs/reference/using-api/api-concepts/#semantics-for-get-and-list
|
||||
switch opts.ResourceVersionMatch {
|
||||
case metav1.ResourceVersionMatchExact:
|
||||
return cache.ShouldDelegateExactRV(opts.ResourceVersion, opts.Recursive)
|
||||
case metav1.ResourceVersionMatchNotOlderThan:
|
||||
return Result{ShouldDelegate: false}, nil
|
||||
case "":
|
||||
// Continue
|
||||
if len(opts.Predicate.Continue) > 0 {
|
||||
return cache.ShouldDelegateContinue(opts.Predicate.Continue, opts.Recursive)
|
||||
}
|
||||
// Legacy exact match
|
||||
if opts.Predicate.Limit > 0 && len(opts.ResourceVersion) > 0 && opts.ResourceVersion != "0" {
|
||||
return cache.ShouldDelegateExactRV(opts.ResourceVersion, opts.Recursive)
|
||||
}
|
||||
// Consistent Read
|
||||
if opts.ResourceVersion == "" {
|
||||
return cache.ShouldDelegateConsistentRead()
|
||||
}
|
||||
return Result{ShouldDelegate: false}, nil
|
||||
default:
|
||||
return Result{ShouldDelegate: true}, nil
|
||||
}
|
||||
}
|
||||
|
||||
type Helper interface {
|
||||
ShouldDelegateExactRV(rv string, recursive bool) (Result, error)
|
||||
ShouldDelegateContinue(continueToken string, recursive bool) (Result, error)
|
||||
ShouldDelegateConsistentRead() (Result, error)
|
||||
}
|
||||
|
||||
// Result of delegator decision.
|
||||
type Result struct {
|
||||
// Whether a request cannot be served by cache and should be delegated to etcd.
|
||||
ShouldDelegate bool
|
||||
// Whether a request is a consistent read, used by delegator to decide if it should call GetCurrentResourceVersion to get RV.
|
||||
// Included in interface as only cacher has keyPrefix needed to parse continue token.
|
||||
ConsistentRead bool
|
||||
}
|
||||
|
||||
type CacheWithoutSnapshots struct{}
|
||||
|
||||
var _ Helper = CacheWithoutSnapshots{}
|
||||
|
||||
func (c CacheWithoutSnapshots) ShouldDelegateContinue(continueToken string, recursive bool) (Result, error) {
|
||||
return Result{
|
||||
ShouldDelegate: true,
|
||||
// Continue with negative RV is considered a consistent read, however token cannot be parsed without keyPrefix unavailable in staging/src/k8s.io/apiserver/pkg/util/flow_control/request/list_work_estimator.go.
|
||||
ConsistentRead: false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c CacheWithoutSnapshots) ShouldDelegateExactRV(rv string, recursive bool) (Result, error) {
|
||||
return Result{
|
||||
ShouldDelegate: true,
|
||||
ConsistentRead: false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c CacheWithoutSnapshots) ShouldDelegateConsistentRead() (Result, error) {
|
||||
return Result{
|
||||
ShouldDelegate: !ConsistentReadSupported(),
|
||||
ConsistentRead: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ConsistentReadSupported returns whether cache can be used to serve reads with RV not yet observed by cache, including both consistent reads.
|
||||
// Function is located here to avoid import cycles between staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go and staging/src/k8s.io/apiserver/pkg/util/flow_control/request/list_work_estimator.go.
|
||||
func ConsistentReadSupported() bool {
|
||||
consistentListFromCacheEnabled := utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache)
|
||||
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
|
||||
return consistentListFromCacheEnabled && requestWatchProgressSupported
|
||||
}
|
89
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/lister_watcher.go
generated
vendored
89
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/lister_watcher.go
generated
vendored
@ -1,89 +0,0 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
// listerWatcher opaques storage.Interface to expose cache.ListerWatcher.
|
||||
type listerWatcher struct {
|
||||
storage storage.Interface
|
||||
resourcePrefix string
|
||||
newListFunc func() runtime.Object
|
||||
contextMetadata metadata.MD
|
||||
}
|
||||
|
||||
// NewListerWatcher returns a storage.Interface backed ListerWatcher.
|
||||
func NewListerWatcher(storage storage.Interface, resourcePrefix string, newListFunc func() runtime.Object, contextMetadata metadata.MD) cache.ListerWatcher {
|
||||
return &listerWatcher{
|
||||
storage: storage,
|
||||
resourcePrefix: resourcePrefix,
|
||||
newListFunc: newListFunc,
|
||||
contextMetadata: contextMetadata,
|
||||
}
|
||||
}
|
||||
|
||||
// Implements cache.ListerWatcher interface.
|
||||
func (lw *listerWatcher) List(options metav1.ListOptions) (runtime.Object, error) {
|
||||
list := lw.newListFunc()
|
||||
pred := storage.SelectionPredicate{
|
||||
Label: labels.Everything(),
|
||||
Field: fields.Everything(),
|
||||
Limit: options.Limit,
|
||||
Continue: options.Continue,
|
||||
}
|
||||
|
||||
storageOpts := storage.ListOptions{
|
||||
ResourceVersionMatch: options.ResourceVersionMatch,
|
||||
Predicate: pred,
|
||||
Recursive: true,
|
||||
}
|
||||
ctx := context.Background()
|
||||
if lw.contextMetadata != nil {
|
||||
ctx = metadata.NewOutgoingContext(ctx, lw.contextMetadata)
|
||||
}
|
||||
if err := lw.storage.GetList(ctx, lw.resourcePrefix, storageOpts, list); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
// Implements cache.ListerWatcher interface.
|
||||
func (lw *listerWatcher) Watch(options metav1.ListOptions) (watch.Interface, error) {
|
||||
opts := storage.ListOptions{
|
||||
ResourceVersion: options.ResourceVersion,
|
||||
Predicate: storage.Everything,
|
||||
Recursive: true,
|
||||
ProgressNotify: true,
|
||||
}
|
||||
ctx := context.Background()
|
||||
if lw.contextMetadata != nil {
|
||||
ctx = metadata.NewOutgoingContext(ctx, lw.contextMetadata)
|
||||
}
|
||||
return lw.storage.Watch(ctx, lw.resourcePrefix, opts)
|
||||
}
|
8
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/OWNERS
generated
vendored
8
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/OWNERS
generated
vendored
@ -1,8 +0,0 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-instrumentation-approvers
|
||||
reviewers:
|
||||
- sig-instrumentation-reviewers
|
||||
labels:
|
||||
- sig/instrumentation
|
233
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/metrics.go
generated
vendored
233
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/metrics/metrics.go
generated
vendored
@ -1,233 +0,0 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
compbasemetrics "k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
const (
|
||||
namespace = "apiserver"
|
||||
subsystem = "watch_cache"
|
||||
)
|
||||
|
||||
/*
|
||||
* By default, all the following metrics are defined as falling under
|
||||
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
|
||||
*
|
||||
* Promoting the stability level of the metric is a responsibility of the component owner, since it
|
||||
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
|
||||
* the metric stability policy.
|
||||
*/
|
||||
var (
|
||||
listCacheCount = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "cache_list_total",
|
||||
Help: "Number of LIST requests served from watch cache",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource_prefix", "index"},
|
||||
)
|
||||
listCacheNumFetched = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "cache_list_fetched_objects_total",
|
||||
Help: "Number of objects read from watch cache in the course of serving a LIST request",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource_prefix", "index"},
|
||||
)
|
||||
listCacheNumReturned = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "cache_list_returned_objects_total",
|
||||
Help: "Number of objects returned for a LIST request from watch cache",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource_prefix"},
|
||||
)
|
||||
InitCounter = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "init_events_total",
|
||||
Help: "Counter of init events processed in watch cache broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
EventsReceivedCounter = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "events_received_total",
|
||||
Help: "Counter of events received in watch cache broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
EventsCounter = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "events_dispatched_total",
|
||||
Help: "Counter of events dispatched in watch cache broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
TerminatedWatchersCounter = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "terminated_watchers_total",
|
||||
Help: "Counter of watchers closed due to unresponsiveness broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
watchCacheResourceVersion = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "resource_version",
|
||||
Help: "Current resource version of watch cache broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
watchCacheCapacityIncreaseTotal = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Subsystem: subsystem,
|
||||
Name: "capacity_increase_total",
|
||||
Help: "Total number of watch cache capacity increase events broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
watchCacheCapacityDecreaseTotal = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Subsystem: subsystem,
|
||||
Name: "capacity_decrease_total",
|
||||
Help: "Total number of watch cache capacity decrease events broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
WatchCacheCapacity = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Subsystem: subsystem,
|
||||
Name: "capacity",
|
||||
Help: "Total capacity of watch cache broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
WatchCacheInitializations = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "initializations_total",
|
||||
Help: "Counter of watch cache initializations broken by resource type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
|
||||
WatchCacheReadWait = compbasemetrics.NewHistogramVec(
|
||||
&compbasemetrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "read_wait_seconds",
|
||||
Help: "Histogram of time spent waiting for a watch cache to become fresh.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3},
|
||||
}, []string{"resource"})
|
||||
|
||||
ConsistentReadTotal = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "consistent_read_total",
|
||||
Help: "Counter for consistent reads from cache.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
}, []string{"resource", "success", "fallback"})
|
||||
|
||||
StorageConsistencyCheckTotal = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "storage_consistency_checks_total",
|
||||
Help: "Counter for status of consistency checks between etcd and watch cache",
|
||||
StabilityLevel: compbasemetrics.INTERNAL,
|
||||
}, []string{"resource", "status"})
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register all metrics.
|
||||
func Register() {
|
||||
// Register the metrics.
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(listCacheCount)
|
||||
legacyregistry.MustRegister(listCacheNumFetched)
|
||||
legacyregistry.MustRegister(listCacheNumReturned)
|
||||
legacyregistry.MustRegister(InitCounter)
|
||||
legacyregistry.MustRegister(EventsReceivedCounter)
|
||||
legacyregistry.MustRegister(EventsCounter)
|
||||
legacyregistry.MustRegister(TerminatedWatchersCounter)
|
||||
legacyregistry.MustRegister(watchCacheResourceVersion)
|
||||
legacyregistry.MustRegister(watchCacheCapacityIncreaseTotal)
|
||||
legacyregistry.MustRegister(watchCacheCapacityDecreaseTotal)
|
||||
legacyregistry.MustRegister(WatchCacheCapacity)
|
||||
legacyregistry.MustRegister(WatchCacheInitializations)
|
||||
legacyregistry.MustRegister(WatchCacheReadWait)
|
||||
legacyregistry.MustRegister(ConsistentReadTotal)
|
||||
legacyregistry.MustRegister(StorageConsistencyCheckTotal)
|
||||
})
|
||||
}
|
||||
|
||||
// RecordListCacheMetrics notes various metrics of the cost to serve a LIST request
|
||||
func RecordListCacheMetrics(resourcePrefix, indexName string, numFetched, numReturned int) {
|
||||
listCacheCount.WithLabelValues(resourcePrefix, indexName).Inc()
|
||||
listCacheNumFetched.WithLabelValues(resourcePrefix, indexName).Add(float64(numFetched))
|
||||
listCacheNumReturned.WithLabelValues(resourcePrefix).Add(float64(numReturned))
|
||||
}
|
||||
|
||||
// RecordResourceVersion sets the current resource version for a given resource type.
|
||||
func RecordResourceVersion(resourcePrefix string, resourceVersion uint64) {
|
||||
watchCacheResourceVersion.WithLabelValues(resourcePrefix).Set(float64(resourceVersion))
|
||||
}
|
||||
|
||||
// RecordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations.
|
||||
func RecordsWatchCacheCapacityChange(objType string, old, new int) {
|
||||
WatchCacheCapacity.WithLabelValues(objType).Set(float64(new))
|
||||
if old < new {
|
||||
watchCacheCapacityIncreaseTotal.WithLabelValues(objType).Inc()
|
||||
return
|
||||
}
|
||||
watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc()
|
||||
}
|
129
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/progress/watch_progress.go
generated
vendored
129
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/progress/watch_progress.go
generated
vendored
@ -1,129 +0,0 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package progress
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
const (
|
||||
// progressRequestPeriod determines period of requesting progress
|
||||
// from etcd when there is a request waiting for watch cache to be fresh.
|
||||
progressRequestPeriod = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
func NewConditionalProgressRequester(requestWatchProgress WatchProgressRequester, clock TickerFactory, contextMetadata metadata.MD) *ConditionalProgressRequester {
|
||||
pr := &ConditionalProgressRequester{
|
||||
clock: clock,
|
||||
requestWatchProgress: requestWatchProgress,
|
||||
contextMetadata: contextMetadata,
|
||||
}
|
||||
pr.cond = sync.NewCond(&pr.mux)
|
||||
return pr
|
||||
}
|
||||
|
||||
type WatchProgressRequester func(ctx context.Context) error
|
||||
|
||||
type TickerFactory interface {
|
||||
NewTimer(time.Duration) clock.Timer
|
||||
}
|
||||
|
||||
// ConditionalProgressRequester will request progress notification if there
|
||||
// is a request waiting for watch cache to be fresh.
|
||||
type ConditionalProgressRequester struct {
|
||||
clock TickerFactory
|
||||
requestWatchProgress WatchProgressRequester
|
||||
contextMetadata metadata.MD
|
||||
|
||||
mux sync.Mutex
|
||||
cond *sync.Cond
|
||||
waiting int
|
||||
stopped bool
|
||||
}
|
||||
|
||||
func (pr *ConditionalProgressRequester) Run(stopCh <-chan struct{}) {
|
||||
ctx := wait.ContextForChannel(stopCh)
|
||||
if pr.contextMetadata != nil {
|
||||
ctx = metadata.NewOutgoingContext(ctx, pr.contextMetadata)
|
||||
}
|
||||
go func() {
|
||||
defer utilruntime.HandleCrash()
|
||||
<-stopCh
|
||||
pr.mux.Lock()
|
||||
defer pr.mux.Unlock()
|
||||
pr.stopped = true
|
||||
pr.cond.Signal()
|
||||
}()
|
||||
timer := pr.clock.NewTimer(progressRequestPeriod)
|
||||
defer timer.Stop()
|
||||
for {
|
||||
stopped := func() bool {
|
||||
pr.mux.Lock()
|
||||
defer pr.mux.Unlock()
|
||||
for pr.waiting == 0 && !pr.stopped {
|
||||
pr.cond.Wait()
|
||||
}
|
||||
return pr.stopped
|
||||
}()
|
||||
if stopped {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-timer.C():
|
||||
shouldRequest := func() bool {
|
||||
pr.mux.Lock()
|
||||
defer pr.mux.Unlock()
|
||||
return pr.waiting > 0 && !pr.stopped
|
||||
}()
|
||||
if !shouldRequest {
|
||||
timer.Reset(0)
|
||||
continue
|
||||
}
|
||||
timer.Reset(progressRequestPeriod)
|
||||
err := pr.requestWatchProgress(ctx)
|
||||
if err != nil {
|
||||
klog.V(4).InfoS("Error requesting bookmark", "err", err)
|
||||
}
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pr *ConditionalProgressRequester) Add() {
|
||||
pr.mux.Lock()
|
||||
defer pr.mux.Unlock()
|
||||
pr.waiting += 1
|
||||
pr.cond.Signal()
|
||||
}
|
||||
|
||||
func (pr *ConditionalProgressRequester) Remove() {
|
||||
pr.mux.Lock()
|
||||
defer pr.mux.Unlock()
|
||||
pr.waiting -= 1
|
||||
}
|
195
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/ready.go
generated
vendored
195
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/ready.go
generated
vendored
@ -1,195 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
type status int
|
||||
|
||||
const (
|
||||
Pending status = iota
|
||||
Ready
|
||||
Stopped
|
||||
)
|
||||
|
||||
// ready is a three state condition variable that blocks until is Ready if is not Stopped.
|
||||
// Its initial state is Pending and its state machine diagram is as follow.
|
||||
//
|
||||
// Pending <------> Ready -----> Stopped
|
||||
//
|
||||
// | ^
|
||||
// └---------------------------┘
|
||||
type ready struct {
|
||||
state status // represent the state of the variable
|
||||
lastErr error
|
||||
generation int // represent the number of times we have transtioned to ready
|
||||
lock sync.RWMutex // protect the state and generation variables
|
||||
restartLock sync.Mutex // protect the transition from ready to pending where the channel is recreated
|
||||
waitCh chan struct{} // blocks until is ready or stopped
|
||||
|
||||
clock clock.Clock
|
||||
lastStateChangeTime time.Time
|
||||
}
|
||||
|
||||
func newReady(c clock.Clock) *ready {
|
||||
r := &ready{
|
||||
waitCh: make(chan struct{}),
|
||||
state: Pending,
|
||||
clock: c,
|
||||
}
|
||||
r.updateLastStateChangeTimeLocked()
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// done close the channel once the state is Ready or Stopped
|
||||
func (r *ready) done() chan struct{} {
|
||||
r.restartLock.Lock()
|
||||
defer r.restartLock.Unlock()
|
||||
return r.waitCh
|
||||
}
|
||||
|
||||
// wait blocks until it is Ready or Stopped, it returns an error if is Stopped.
|
||||
func (r *ready) wait(ctx context.Context) error {
|
||||
_, err := r.waitAndReadGeneration(ctx)
|
||||
return err
|
||||
}
|
||||
|
||||
// waitAndReadGenration blocks until it is Ready or Stopped and returns number
|
||||
// of times we entered ready state if Ready and error otherwise.
|
||||
func (r *ready) waitAndReadGeneration(ctx context.Context) (int, error) {
|
||||
for {
|
||||
// r.done() only blocks if state is Pending
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return 0, ctx.Err()
|
||||
case <-r.done():
|
||||
}
|
||||
|
||||
r.lock.RLock()
|
||||
if r.state == Pending {
|
||||
// since we allow to switch between the states Pending and Ready
|
||||
// if there is a quick transition from Pending -> Ready -> Pending
|
||||
// a process that was waiting can get unblocked and see a Pending
|
||||
// state again. If the state is Pending we have to wait again to
|
||||
// avoid an inconsistent state on the system, with some processes not
|
||||
// waiting despite the state moved back to Pending.
|
||||
r.lock.RUnlock()
|
||||
continue
|
||||
}
|
||||
generation, err := r.readGenerationLocked()
|
||||
r.lock.RUnlock()
|
||||
return generation, err
|
||||
}
|
||||
}
|
||||
|
||||
// check returns the time elapsed since the state was last changed and the current value.
|
||||
func (r *ready) check() (time.Duration, error) {
|
||||
_, elapsed, err := r.checkAndReadGeneration()
|
||||
return elapsed, err
|
||||
}
|
||||
|
||||
// checkAndReadGeneration returns the current generation, the time elapsed since the state was last changed and the current value.
|
||||
func (r *ready) checkAndReadGeneration() (int, time.Duration, error) {
|
||||
r.lock.RLock()
|
||||
defer r.lock.RUnlock()
|
||||
generation, err := r.readGenerationLocked()
|
||||
return generation, r.clock.Since(r.lastStateChangeTime), err
|
||||
}
|
||||
|
||||
func (r *ready) readGenerationLocked() (int, error) {
|
||||
switch r.state {
|
||||
case Pending:
|
||||
if r.lastErr == nil {
|
||||
return 0, fmt.Errorf("storage is (re)initializing")
|
||||
} else {
|
||||
return 0, fmt.Errorf("storage is (re)initializing: %w", r.lastErr)
|
||||
}
|
||||
case Ready:
|
||||
return r.generation, nil
|
||||
case Stopped:
|
||||
return 0, fmt.Errorf("apiserver cacher is stopped")
|
||||
default:
|
||||
return 0, fmt.Errorf("unexpected apiserver cache state: %v", r.state)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ready) setReady() {
|
||||
r.set(true, nil)
|
||||
}
|
||||
|
||||
func (r *ready) setError(err error) {
|
||||
r.set(false, err)
|
||||
}
|
||||
|
||||
// set the state to Pending (false) or Ready (true), it does not have effect if the state is Stopped.
|
||||
func (r *ready) set(ok bool, err error) {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
if r.state == Stopped {
|
||||
return
|
||||
}
|
||||
r.lastErr = err
|
||||
if ok && r.state == Pending {
|
||||
r.state = Ready
|
||||
r.generation++
|
||||
r.updateLastStateChangeTimeLocked()
|
||||
select {
|
||||
case <-r.waitCh:
|
||||
default:
|
||||
close(r.waitCh)
|
||||
}
|
||||
} else if !ok && r.state == Ready {
|
||||
// creating the waitCh can be racy if
|
||||
// something enter the wait() method
|
||||
select {
|
||||
case <-r.waitCh:
|
||||
r.restartLock.Lock()
|
||||
r.waitCh = make(chan struct{})
|
||||
r.restartLock.Unlock()
|
||||
default:
|
||||
}
|
||||
r.state = Pending
|
||||
r.updateLastStateChangeTimeLocked()
|
||||
}
|
||||
}
|
||||
|
||||
// stop the condition variable and set it as Stopped. This state is irreversible.
|
||||
func (r *ready) stop() {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
if r.state != Stopped {
|
||||
r.state = Stopped
|
||||
r.updateLastStateChangeTimeLocked()
|
||||
}
|
||||
select {
|
||||
case <-r.waitCh:
|
||||
default:
|
||||
close(r.waitCh)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ready) updateLastStateChangeTimeLocked() {
|
||||
r.lastStateChangeTime = r.clock.Now()
|
||||
}
|
135
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/store.go
generated
vendored
135
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/store.go
generated
vendored
@ -1,135 +0,0 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apiserver/pkg/features"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
const (
|
||||
// btreeDegree defines the degree of btree storage.
|
||||
// Decided based on the benchmark results (below).
|
||||
// Selected the lowest degree from three options with best runtime (16,32,128).
|
||||
// │ 2 │ 4 │ 8 │ 16 │ 32 │ 64 │ 128 │
|
||||
// │ sec/op │ sec/op vs base │ sec/op vs base │ sec/op vs base │ sec/op vs base │ sec/op vs base │ sec/op vs base │
|
||||
// StoreCreateList/RV=NotOlderThan-24 473.0µ ± 11% 430.1µ ± 9% -9.08% (p=0.005 n=10) 427.9µ ± 6% -9.54% (p=0.002 n=10) 403.9µ ± 8% -14.62% (p=0.000 n=10) 401.0µ ± 4% -15.22% (p=0.000 n=10) 408.0µ ± 4% -13.75% (p=0.000 n=10) 385.9µ ± 4% -18.42% (p=0.000 n=10)
|
||||
// StoreCreateList/RV=ExactMatch-24 604.7µ ± 4% 596.7µ ± 8% ~ (p=0.529 n=10) 604.6µ ± 4% ~ (p=0.971 n=10) 601.1µ ± 4% ~ (p=0.853 n=10) 611.0µ ± 6% ~ (p=0.105 n=10) 598.2µ ± 5% ~ (p=0.579 n=10) 608.2µ ± 3% ~ (p=0.796 n=10)
|
||||
// StoreList/List=All/Paginate=False/RV=Empty-24 729.1µ ± 5% 692.9µ ± 3% -4.96% (p=0.002 n=10) 693.7µ ± 3% -4.86% (p=0.000 n=10) 688.3µ ± 1% -5.59% (p=0.000 n=10) 690.4µ ± 5% -5.31% (p=0.002 n=10) 689.7µ ± 2% -5.40% (p=0.000 n=10) 687.8µ ± 3% -5.67% (p=0.000 n=10)
|
||||
// StoreList/List=All/Paginate=True/RV=Empty-24 19.51m ± 2% 19.84m ± 2% ~ (p=0.105 n=10) 19.89m ± 3% ~ (p=0.190 n=10) 19.64m ± 4% ~ (p=0.853 n=10) 19.34m ± 4% ~ (p=0.481 n=10) 20.22m ± 4% +3.66% (p=0.007 n=10) 19.58m ± 4% ~ (p=0.912 n=10)
|
||||
// StoreList/List=Namespace/Paginate=False/RV=Empty-24 1.672m ± 4% 1.635m ± 2% ~ (p=0.247 n=10) 1.673m ± 5% ~ (p=0.631 n=10) 1.657m ± 2% ~ (p=0.971 n=10) 1.656m ± 4% ~ (p=0.739 n=10) 1.678m ± 2% ~ (p=0.631 n=10) 1.718m ± 8% ~ (p=0.105 n=10)
|
||||
// geomean 1.467m 1.420m -3.24% 1.430m -2.58% 1.403m -4.38% 1.402m -4.46% 1.417m -3.44% 1.403m -4.41%
|
||||
//
|
||||
// │ 2 │ 4 │ 8 │ 16 │ 32 │ 64 │ 128 │
|
||||
// │ B/op │ B/op vs base │ B/op vs base │ B/op vs base │ B/op vs base │ B/op vs base │ B/op vs base │
|
||||
// StoreCreateList/RV=NotOlderThan-24 98.58Ki ± 11% 101.33Ki ± 13% ~ (p=0.280 n=10) 99.80Ki ± 26% ~ (p=0.353 n=10) 109.63Ki ± 9% ~ (p=0.075 n=10) 112.56Ki ± 6% +14.18% (p=0.007 n=10) 114.41Ki ± 10% +16.05% (p=0.003 n=10) 115.06Ki ± 12% +16.72% (p=0.011 n=10)
|
||||
// StoreCreateList/RV=ExactMatch-24 117.1Ki ± 0% 117.5Ki ± 0% ~ (p=0.218 n=10) 116.9Ki ± 0% ~ (p=0.052 n=10) 117.3Ki ± 0% ~ (p=0.353 n=10) 116.9Ki ± 0% ~ (p=0.075 n=10) 117.0Ki ± 0% ~ (p=0.436 n=10) 117.0Ki ± 0% ~ (p=0.280 n=10)
|
||||
// StoreList/List=All/Paginate=False/RV=Empty-24 6.023Mi ± 0% 6.024Mi ± 0% +0.01% (p=0.037 n=10) 6.024Mi ± 0% ~ (p=0.493 n=10) 6.024Mi ± 0% +0.01% (p=0.035 n=10) 6.024Mi ± 0% ~ (p=0.247 n=10) 6.024Mi ± 0% ~ (p=0.247 n=10) 6.024Mi ± 0% ~ (p=0.315 n=10)
|
||||
// StoreList/List=All/Paginate=True/RV=Empty-24 64.22Mi ± 0% 64.21Mi ± 0% ~ (p=0.075 n=10) 64.23Mi ± 0% ~ (p=0.280 n=10) 64.21Mi ± 0% -0.02% (p=0.002 n=10) 64.22Mi ± 0% ~ (p=0.579 n=10) 64.22Mi ± 0% ~ (p=0.971 n=10) 64.22Mi ± 0% ~ (p=1.000 n=10)
|
||||
// StoreList/List=Namespace/Paginate=False/RV=Empty-24 8.177Mi ± 0% 8.178Mi ± 0% ~ (p=0.579 n=10) 8.177Mi ± 0% ~ (p=0.971 n=10) 8.179Mi ± 0% ~ (p=0.579 n=10) 8.178Mi ± 0% ~ (p=0.739 n=10) 8.179Mi ± 0% ~ (p=0.315 n=10) 8.176Mi ± 0% ~ (p=0.247 n=10)
|
||||
// geomean 2.034Mi 2.047Mi +0.61% 2.039Mi +0.22% 2.079Mi +2.19% 2.088Mi +2.66% 2.095Mi +3.01% 2.098Mi +3.12%
|
||||
//
|
||||
// │ 2 │ 4 │ 8 │ 16 │ 32 │ 64 │ 128 │
|
||||
// │ allocs/op │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │ allocs/op vs base │
|
||||
// StoreCreateList/RV=NotOlderThan-24 560.0 ± 0% 558.0 ± 0% -0.36% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10) 558.0 ± 0% -0.36% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10) 557.0 ± 0% -0.54% (p=0.000 n=10)
|
||||
// StoreCreateList/RV=ExactMatch-24 871.0 ± 0% 870.0 ± 0% -0.11% (p=0.038 n=10) 870.0 ± 0% -0.11% (p=0.004 n=10) 870.0 ± 0% -0.11% (p=0.005 n=10) 869.0 ± 0% -0.23% (p=0.000 n=10) 870.0 ± 0% -0.11% (p=0.001 n=10) 870.0 ± 0% -0.11% (p=0.000 n=10)
|
||||
// StoreList/List=All/Paginate=False/RV=Empty-24 351.0 ± 3% 358.0 ± 1% +1.99% (p=0.034 n=10) 352.5 ± 3% ~ (p=0.589 n=10) 358.5 ± 1% +2.14% (p=0.022 n=10) 356.5 ± 3% ~ (p=0.208 n=10) 355.0 ± 3% ~ (p=0.224 n=10) 355.0 ± 3% ~ (p=0.183 n=10)
|
||||
// StoreList/List=All/Paginate=True/RV=Empty-24 494.4k ± 0% 494.4k ± 0% ~ (p=0.424 n=10) 494.6k ± 0% +0.06% (p=0.000 n=10) 492.7k ± 0% -0.34% (p=0.000 n=10) 494.5k ± 0% +0.02% (p=0.009 n=10) 493.0k ± 0% -0.28% (p=0.000 n=10) 494.4k ± 0% ~ (p=0.424 n=10)
|
||||
// StoreList/List=Namespace/Paginate=False/RV=Empty-24 32.43k ± 0% 32.44k ± 0% ~ (p=0.579 n=10) 32.43k ± 0% ~ (p=0.971 n=10) 32.45k ± 0% ~ (p=0.517 n=10) 32.44k ± 0% ~ (p=0.670 n=10) 32.46k ± 0% ~ (p=0.256 n=10) 32.41k ± 0% ~ (p=0.247 n=10)
|
||||
// geomean 4.872k 4.887k +0.31% 4.870k -0.03% 4.885k +0.28% 4.880k +0.17% 4.875k +0.06% 4.876k +0.08%
|
||||
btreeDegree = 16
|
||||
)
|
||||
|
||||
type storeIndexer interface {
|
||||
Add(obj interface{}) error
|
||||
Update(obj interface{}) error
|
||||
Delete(obj interface{}) error
|
||||
List() []interface{}
|
||||
ListKeys() []string
|
||||
Get(obj interface{}) (item interface{}, exists bool, err error)
|
||||
GetByKey(key string) (item interface{}, exists bool, err error)
|
||||
Replace([]interface{}, string) error
|
||||
ByIndex(indexName, indexedValue string) ([]interface{}, error)
|
||||
}
|
||||
|
||||
type orderedLister interface {
|
||||
ListPrefix(prefix, continueKey string) []interface{}
|
||||
Count(prefix, continueKey string) (count int)
|
||||
Clone() orderedLister
|
||||
}
|
||||
|
||||
func newStoreIndexer(indexers *cache.Indexers) storeIndexer {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.BtreeWatchCache) {
|
||||
return newThreadedBtreeStoreIndexer(storeElementIndexers(indexers), btreeDegree)
|
||||
}
|
||||
return cache.NewIndexer(storeElementKey, storeElementIndexers(indexers))
|
||||
}
|
||||
|
||||
// Computing a key of an object is generally non-trivial (it performs
|
||||
// e.g. validation underneath). Similarly computing object fields and
|
||||
// labels. To avoid computing them multiple times (to serve the event
|
||||
// in different List/Watch requests), in the underlying store we are
|
||||
// keeping structs (key, object, labels, fields).
|
||||
type storeElement struct {
|
||||
Key string
|
||||
Object runtime.Object
|
||||
Labels labels.Set
|
||||
Fields fields.Set
|
||||
}
|
||||
|
||||
func storeElementKey(obj interface{}) (string, error) {
|
||||
elem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("not a storeElement: %v", obj)
|
||||
}
|
||||
return elem.Key, nil
|
||||
}
|
||||
|
||||
func storeElementObject(obj interface{}) (runtime.Object, error) {
|
||||
elem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("not a storeElement: %v", obj)
|
||||
}
|
||||
return elem.Object, nil
|
||||
}
|
||||
|
||||
func storeElementIndexFunc(objIndexFunc cache.IndexFunc) cache.IndexFunc {
|
||||
return func(obj interface{}) (strings []string, e error) {
|
||||
seo, err := storeElementObject(obj)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return objIndexFunc(seo)
|
||||
}
|
||||
}
|
||||
|
||||
func storeElementIndexers(indexers *cache.Indexers) cache.Indexers {
|
||||
if indexers == nil {
|
||||
return cache.Indexers{}
|
||||
}
|
||||
ret := cache.Indexers{}
|
||||
for indexName, indexFunc := range *indexers {
|
||||
ret[indexName] = storeElementIndexFunc(indexFunc)
|
||||
}
|
||||
return ret
|
||||
}
|
505
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/store_btree.go
generated
vendored
505
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/store_btree.go
generated
vendored
@ -1,505 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/google/btree"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
// newThreadedBtreeStoreIndexer returns a storage for cacher by adding locking over the two 2 data structures:
|
||||
// * btree based storage for efficient LIST operation on prefix
|
||||
// * map based indexer for retrieving values by index.
|
||||
// This separation is used to allow independent snapshotting those two storages in the future.
|
||||
// Intention is to utilize btree for its cheap snapshots that don't require locking if don't mutate data.
|
||||
func newThreadedBtreeStoreIndexer(indexers cache.Indexers, degree int) *threadedStoreIndexer {
|
||||
return &threadedStoreIndexer{
|
||||
store: newBtreeStore(degree),
|
||||
indexer: newIndexer(indexers),
|
||||
}
|
||||
}
|
||||
|
||||
type threadedStoreIndexer struct {
|
||||
lock sync.RWMutex
|
||||
store btreeStore
|
||||
indexer indexer
|
||||
}
|
||||
|
||||
var _ orderedLister = (*threadedStoreIndexer)(nil)
|
||||
|
||||
func (si *threadedStoreIndexer) Count(prefix, continueKey string) (count int) {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.store.Count(prefix, continueKey)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) Clone() orderedLister {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.store.Clone()
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) Add(obj interface{}) error {
|
||||
return si.addOrUpdate(obj)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) Update(obj interface{}) error {
|
||||
return si.addOrUpdate(obj)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) addOrUpdate(obj interface{}) error {
|
||||
if obj == nil {
|
||||
return fmt.Errorf("obj cannot be nil")
|
||||
}
|
||||
newElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return fmt.Errorf("obj not a storeElement: %#v", obj)
|
||||
}
|
||||
si.lock.Lock()
|
||||
defer si.lock.Unlock()
|
||||
oldElem := si.store.addOrUpdateElem(newElem)
|
||||
return si.indexer.updateElem(newElem.Key, oldElem, newElem)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) Delete(obj interface{}) error {
|
||||
storeElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return fmt.Errorf("obj not a storeElement: %#v", obj)
|
||||
}
|
||||
si.lock.Lock()
|
||||
defer si.lock.Unlock()
|
||||
oldObj, existed := si.store.deleteElem(storeElem)
|
||||
if !existed {
|
||||
return nil
|
||||
}
|
||||
return si.indexer.updateElem(storeElem.Key, oldObj, nil)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) List() []interface{} {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.store.List()
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) ListPrefix(prefix, continueKey string) []interface{} {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.store.ListPrefix(prefix, continueKey)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) ListKeys() []string {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.store.ListKeys()
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) Get(obj interface{}) (item interface{}, exists bool, err error) {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.store.Get(obj)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) GetByKey(key string) (item interface{}, exists bool, err error) {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.store.GetByKey(key)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) Replace(objs []interface{}, resourceVersion string) error {
|
||||
si.lock.Lock()
|
||||
defer si.lock.Unlock()
|
||||
err := si.store.Replace(objs, resourceVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return si.indexer.Replace(objs, resourceVersion)
|
||||
}
|
||||
|
||||
func (si *threadedStoreIndexer) ByIndex(indexName, indexValue string) ([]interface{}, error) {
|
||||
si.lock.RLock()
|
||||
defer si.lock.RUnlock()
|
||||
return si.indexer.ByIndex(indexName, indexValue)
|
||||
}
|
||||
|
||||
func newBtreeStore(degree int) btreeStore {
|
||||
return btreeStore{
|
||||
tree: btree.NewG(degree, func(a, b *storeElement) bool {
|
||||
return a.Key < b.Key
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
type btreeStore struct {
|
||||
tree *btree.BTreeG[*storeElement]
|
||||
}
|
||||
|
||||
func (s *btreeStore) Clone() orderedLister {
|
||||
return &btreeStore{
|
||||
tree: s.tree.Clone(),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *btreeStore) Add(obj interface{}) error {
|
||||
if obj == nil {
|
||||
return fmt.Errorf("obj cannot be nil")
|
||||
}
|
||||
storeElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return fmt.Errorf("obj not a storeElement: %#v", obj)
|
||||
}
|
||||
s.addOrUpdateElem(storeElem)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *btreeStore) Update(obj interface{}) error {
|
||||
if obj == nil {
|
||||
return fmt.Errorf("obj cannot be nil")
|
||||
}
|
||||
storeElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return fmt.Errorf("obj not a storeElement: %#v", obj)
|
||||
}
|
||||
s.addOrUpdateElem(storeElem)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *btreeStore) Delete(obj interface{}) error {
|
||||
if obj == nil {
|
||||
return fmt.Errorf("obj cannot be nil")
|
||||
}
|
||||
storeElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return fmt.Errorf("obj not a storeElement: %#v", obj)
|
||||
}
|
||||
s.deleteElem(storeElem)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *btreeStore) deleteElem(storeElem *storeElement) (*storeElement, bool) {
|
||||
return s.tree.Delete(storeElem)
|
||||
}
|
||||
|
||||
func (s *btreeStore) List() []interface{} {
|
||||
items := make([]interface{}, 0, s.tree.Len())
|
||||
s.tree.Ascend(func(item *storeElement) bool {
|
||||
items = append(items, item)
|
||||
return true
|
||||
})
|
||||
return items
|
||||
}
|
||||
|
||||
func (s *btreeStore) ListKeys() []string {
|
||||
items := make([]string, 0, s.tree.Len())
|
||||
s.tree.Ascend(func(item *storeElement) bool {
|
||||
items = append(items, item.Key)
|
||||
return true
|
||||
})
|
||||
return items
|
||||
}
|
||||
|
||||
func (s *btreeStore) Get(obj interface{}) (item interface{}, exists bool, err error) {
|
||||
storeElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return nil, false, fmt.Errorf("obj is not a storeElement")
|
||||
}
|
||||
item, exists = s.tree.Get(storeElem)
|
||||
return item, exists, nil
|
||||
}
|
||||
|
||||
func (s *btreeStore) GetByKey(key string) (item interface{}, exists bool, err error) {
|
||||
return s.getByKey(key)
|
||||
}
|
||||
|
||||
func (s *btreeStore) Replace(objs []interface{}, _ string) error {
|
||||
s.tree.Clear(false)
|
||||
for _, obj := range objs {
|
||||
storeElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return fmt.Errorf("obj not a storeElement: %#v", obj)
|
||||
}
|
||||
s.addOrUpdateElem(storeElem)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// addOrUpdateLocked assumes a lock is held and is used for Add
|
||||
// and Update operations.
|
||||
func (s *btreeStore) addOrUpdateElem(storeElem *storeElement) *storeElement {
|
||||
oldObj, _ := s.tree.ReplaceOrInsert(storeElem)
|
||||
return oldObj
|
||||
}
|
||||
|
||||
func (s *btreeStore) getByKey(key string) (item interface{}, exists bool, err error) {
|
||||
keyElement := &storeElement{Key: key}
|
||||
item, exists = s.tree.Get(keyElement)
|
||||
return item, exists, nil
|
||||
}
|
||||
|
||||
func (s *btreeStore) ListPrefix(prefix, continueKey string) []interface{} {
|
||||
if continueKey == "" {
|
||||
continueKey = prefix
|
||||
}
|
||||
var result []interface{}
|
||||
s.tree.AscendGreaterOrEqual(&storeElement{Key: continueKey}, func(item *storeElement) bool {
|
||||
if !strings.HasPrefix(item.Key, prefix) {
|
||||
return false
|
||||
}
|
||||
result = append(result, item)
|
||||
return true
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *btreeStore) Count(prefix, continueKey string) (count int) {
|
||||
if continueKey == "" {
|
||||
continueKey = prefix
|
||||
}
|
||||
s.tree.AscendGreaterOrEqual(&storeElement{Key: continueKey}, func(item *storeElement) bool {
|
||||
if !strings.HasPrefix(item.Key, prefix) {
|
||||
return false
|
||||
}
|
||||
count++
|
||||
return true
|
||||
})
|
||||
return count
|
||||
}
|
||||
|
||||
// newIndexer returns a indexer similar to storeIndex from client-go/tools/cache.
|
||||
// TODO: Unify the indexer code with client-go/cache package.
|
||||
// Major differences is type of values stored and their mutability:
|
||||
// * Indexer in client-go stores object keys, that are not mutable.
|
||||
// * Indexer in cacher stores whole objects, which is mutable.
|
||||
// Indexer in client-go uses keys as it is used in conjunction with map[key]value
|
||||
// allowing for fast value retrieval, while btree used in cacher would provide additional overhead.
|
||||
// Difference in mutability of stored values is used for optimizing some operations in client-go Indexer.
|
||||
func newIndexer(indexers cache.Indexers) indexer {
|
||||
return indexer{
|
||||
indices: map[string]map[string]map[string]*storeElement{},
|
||||
indexers: indexers,
|
||||
}
|
||||
}
|
||||
|
||||
type indexer struct {
|
||||
indices map[string]map[string]map[string]*storeElement
|
||||
indexers cache.Indexers
|
||||
}
|
||||
|
||||
func (i *indexer) ByIndex(indexName, indexValue string) ([]interface{}, error) {
|
||||
indexFunc := i.indexers[indexName]
|
||||
if indexFunc == nil {
|
||||
return nil, fmt.Errorf("index with name %s does not exist", indexName)
|
||||
}
|
||||
index := i.indices[indexName]
|
||||
set := index[indexValue]
|
||||
list := make([]interface{}, 0, len(set))
|
||||
for _, obj := range set {
|
||||
list = append(list, obj)
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
func (i *indexer) Replace(objs []interface{}, resourceVersion string) error {
|
||||
i.indices = map[string]map[string]map[string]*storeElement{}
|
||||
for _, obj := range objs {
|
||||
storeElem, ok := obj.(*storeElement)
|
||||
if !ok {
|
||||
return fmt.Errorf("obj not a storeElement: %#v", obj)
|
||||
}
|
||||
err := i.updateElem(storeElem.Key, nil, storeElem)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexer) updateElem(key string, oldObj, newObj *storeElement) (err error) {
|
||||
var oldIndexValues, indexValues []string
|
||||
for name, indexFunc := range i.indexers {
|
||||
if oldObj != nil {
|
||||
oldIndexValues, err = indexFunc(oldObj)
|
||||
} else {
|
||||
oldIndexValues = oldIndexValues[:0]
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to calculate an index entry for key %q on index %q: %w", key, name, err)
|
||||
}
|
||||
if newObj != nil {
|
||||
indexValues, err = indexFunc(newObj)
|
||||
} else {
|
||||
indexValues = indexValues[:0]
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to calculate an index entry for key %q on index %q: %w", key, name, err)
|
||||
}
|
||||
index := i.indices[name]
|
||||
if index == nil {
|
||||
index = map[string]map[string]*storeElement{}
|
||||
i.indices[name] = index
|
||||
}
|
||||
if len(indexValues) == 1 && len(oldIndexValues) == 1 && indexValues[0] == oldIndexValues[0] {
|
||||
// We optimize for the most common case where indexFunc returns a single value which has not been changed
|
||||
i.add(key, indexValues[0], newObj, index)
|
||||
continue
|
||||
}
|
||||
for _, value := range oldIndexValues {
|
||||
i.delete(key, value, index)
|
||||
}
|
||||
for _, value := range indexValues {
|
||||
i.add(key, value, newObj, index)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexer) add(key, value string, obj *storeElement, index map[string]map[string]*storeElement) {
|
||||
set := index[value]
|
||||
if set == nil {
|
||||
set = map[string]*storeElement{}
|
||||
index[value] = set
|
||||
}
|
||||
set[key] = obj
|
||||
}
|
||||
|
||||
func (i *indexer) delete(key, value string, index map[string]map[string]*storeElement) {
|
||||
set := index[value]
|
||||
if set == nil {
|
||||
return
|
||||
}
|
||||
delete(set, key)
|
||||
// If we don's delete the set when zero, indices with high cardinality
|
||||
// short lived resources can cause memory to increase over time from
|
||||
// unused empty sets. See `kubernetes/kubernetes/issues/84959`.
|
||||
if len(set) == 0 {
|
||||
delete(index, value)
|
||||
}
|
||||
}
|
||||
|
||||
// newStoreSnapshotter returns a storeSnapshotter that stores snapshots for
|
||||
// serving read requests with exact resource versions (RV) and pagination.
|
||||
//
|
||||
// Snapshots are created by calling Clone method on orderedLister, which is
|
||||
// expected to be fast and efficient thanks to usage of B-trees.
|
||||
// B-trees can create a lazy copy of the tree structure, minimizing overhead.
|
||||
//
|
||||
// Assuming the watch cache observes all events and snapshots cache after each of them,
|
||||
// requests for a specific resource version can be served by retrieving
|
||||
// the snapshot with the greatest RV less than or equal to the requested RV.
|
||||
// To make snapshot retrivial efficient we need an ordered data structure, such as tree.
|
||||
//
|
||||
// The initial implementation uses a B-tree to achieve the following performance characteristics (n - number of snapshots stored):
|
||||
// - `Add`: Adds a new snapshot.
|
||||
// Complexity: O(log n).
|
||||
// Executed for each watch event observed by the cache.
|
||||
// - `GetLessOrEqual`: Retrieves the snapshot with the greatest RV less than or equal to the requested RV.
|
||||
// Complexity: O(log n).
|
||||
// Executed for each LIST request with match=Exact or continuation.
|
||||
// - `RemoveLess`: Cleans up snapshots outside the watch history window.
|
||||
// Complexity: O(k log n), k - number of snapshots to remove, usually only one if watch capacity was not reduced.
|
||||
// Executed per watch event observed when the cache is full.
|
||||
// - `Reset`: Cleans up all snapshots.
|
||||
// Complexity: O(1).
|
||||
// Executed when the watch cache is reinitialized.
|
||||
//
|
||||
// Further optimization is possible by leveraging the property that adds always
|
||||
// increase the maximum RV and deletes only increase the minimum RV.
|
||||
// For example, a binary search on a cyclic buffer of (RV, snapshot)
|
||||
// should reduce number of allocations and improve removal complexity.
|
||||
// However, this solution is more complex and is deferred for future implementation.
|
||||
//
|
||||
// TODO: Rewrite to use a cyclic buffer
|
||||
func newStoreSnapshotter() *storeSnapshotter {
|
||||
s := &storeSnapshotter{
|
||||
snapshots: btree.NewG[rvSnapshot](btreeDegree, func(a, b rvSnapshot) bool {
|
||||
return a.resourceVersion < b.resourceVersion
|
||||
}),
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
var _ Snapshotter = (*storeSnapshotter)(nil)
|
||||
|
||||
type Snapshotter interface {
|
||||
Reset()
|
||||
GetLessOrEqual(rv uint64) (orderedLister, bool)
|
||||
Add(rv uint64, indexer orderedLister)
|
||||
RemoveLess(rv uint64)
|
||||
Len() int
|
||||
}
|
||||
|
||||
type storeSnapshotter struct {
|
||||
mux sync.RWMutex
|
||||
snapshots *btree.BTreeG[rvSnapshot]
|
||||
}
|
||||
|
||||
type rvSnapshot struct {
|
||||
resourceVersion uint64
|
||||
snapshot orderedLister
|
||||
}
|
||||
|
||||
func (s *storeSnapshotter) Reset() {
|
||||
s.mux.Lock()
|
||||
defer s.mux.Unlock()
|
||||
s.snapshots.Clear(false)
|
||||
}
|
||||
|
||||
func (s *storeSnapshotter) GetLessOrEqual(rv uint64) (orderedLister, bool) {
|
||||
s.mux.RLock()
|
||||
defer s.mux.RUnlock()
|
||||
|
||||
var result *rvSnapshot
|
||||
s.snapshots.DescendLessOrEqual(rvSnapshot{resourceVersion: rv}, func(rvs rvSnapshot) bool {
|
||||
result = &rvs
|
||||
return false
|
||||
})
|
||||
if result == nil {
|
||||
return nil, false
|
||||
}
|
||||
return result.snapshot, true
|
||||
}
|
||||
|
||||
func (s *storeSnapshotter) Add(rv uint64, indexer orderedLister) {
|
||||
s.mux.Lock()
|
||||
defer s.mux.Unlock()
|
||||
s.snapshots.ReplaceOrInsert(rvSnapshot{resourceVersion: rv, snapshot: indexer.Clone()})
|
||||
}
|
||||
|
||||
func (s *storeSnapshotter) RemoveLess(rv uint64) {
|
||||
s.mux.Lock()
|
||||
defer s.mux.Unlock()
|
||||
for s.snapshots.Len() > 0 {
|
||||
oldest, ok := s.snapshots.Min()
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
if rv <= oldest.resourceVersion {
|
||||
break
|
||||
}
|
||||
s.snapshots.DeleteMin()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *storeSnapshotter) Len() int {
|
||||
s.mux.RLock()
|
||||
defer s.mux.RUnlock()
|
||||
|
||||
return s.snapshots.Len()
|
||||
}
|
102
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/time_budget.go
generated
vendored
102
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/time_budget.go
generated
vendored
@ -1,102 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
const (
|
||||
refreshPerSecond = 50 * time.Millisecond
|
||||
maxBudget = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
// timeBudget implements a budget of time that you can use and is
|
||||
// periodically being refreshed. The pattern to use it is:
|
||||
//
|
||||
// budget := newTimeBudget(...)
|
||||
// ...
|
||||
// timeout := budget.takeAvailable()
|
||||
// // Now you can spend at most timeout on doing stuff
|
||||
// ...
|
||||
// // If you didn't use all timeout, return what you didn't use
|
||||
// budget.returnUnused(<unused part of timeout>)
|
||||
//
|
||||
// NOTE: It's not recommended to be used concurrently from multiple threads -
|
||||
// if first user takes the whole timeout, the second one will get 0 timeout
|
||||
// even though the first one may return something later.
|
||||
type timeBudget interface {
|
||||
takeAvailable() time.Duration
|
||||
returnUnused(unused time.Duration)
|
||||
}
|
||||
|
||||
type timeBudgetImpl struct {
|
||||
sync.Mutex
|
||||
clock clock.Clock
|
||||
budget time.Duration
|
||||
maxBudget time.Duration
|
||||
refresh time.Duration
|
||||
// last store last access time
|
||||
last time.Time
|
||||
}
|
||||
|
||||
func newTimeBudget() timeBudget {
|
||||
result := &timeBudgetImpl{
|
||||
clock: clock.RealClock{},
|
||||
budget: time.Duration(0),
|
||||
refresh: refreshPerSecond,
|
||||
maxBudget: maxBudget,
|
||||
}
|
||||
result.last = result.clock.Now()
|
||||
return result
|
||||
}
|
||||
|
||||
func (t *timeBudgetImpl) takeAvailable() time.Duration {
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
// budget accumulated since last access
|
||||
now := t.clock.Now()
|
||||
acc := now.Sub(t.last).Seconds() * t.refresh.Seconds()
|
||||
if acc < 0 {
|
||||
acc = 0
|
||||
}
|
||||
// update current budget and store the current time
|
||||
if t.budget = t.budget + time.Duration(acc*1e9); t.budget > t.maxBudget {
|
||||
t.budget = t.maxBudget
|
||||
}
|
||||
t.last = now
|
||||
result := t.budget
|
||||
t.budget = time.Duration(0)
|
||||
return result
|
||||
}
|
||||
|
||||
func (t *timeBudgetImpl) returnUnused(unused time.Duration) {
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
if unused < 0 {
|
||||
// We used more than allowed.
|
||||
return
|
||||
}
|
||||
// add the unused time directly to the budget
|
||||
// takeAvailable() will take into account the elapsed time
|
||||
if t.budget = t.budget + unused; t.budget > t.maxBudget {
|
||||
t.budget = t.maxBudget
|
||||
}
|
||||
}
|
56
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/util.go
generated
vendored
56
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/util.go
generated
vendored
@ -1,56 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// hasPathPrefix returns true if the string matches pathPrefix exactly, or if is prefixed with pathPrefix at a path segment boundary
|
||||
func hasPathPrefix(s, pathPrefix string) bool {
|
||||
// Short circuit if s doesn't contain the prefix at all
|
||||
if !strings.HasPrefix(s, pathPrefix) {
|
||||
return false
|
||||
}
|
||||
|
||||
pathPrefixLength := len(pathPrefix)
|
||||
|
||||
if len(s) == pathPrefixLength {
|
||||
// Exact match
|
||||
return true
|
||||
}
|
||||
if strings.HasSuffix(pathPrefix, "/") {
|
||||
// pathPrefix already ensured a path segment boundary
|
||||
return true
|
||||
}
|
||||
if s[pathPrefixLength:pathPrefixLength+1] == "/" {
|
||||
// The next character in s is a path segment boundary
|
||||
// Check this instead of normalizing pathPrefix to avoid allocating on every call
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// calculateRetryAfterForUnreadyCache calculates the retry duration based on the cache downtime.
|
||||
func calculateRetryAfterForUnreadyCache(downtime time.Duration) int {
|
||||
factor := 0.06
|
||||
result := math.Exp(factor * downtime.Seconds())
|
||||
result = math.Min(30, math.Max(1, result))
|
||||
return int(result)
|
||||
}
|
860
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go
generated
vendored
860
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go
generated
vendored
@ -1,860 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/apiserver/pkg/features"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/cacher/delegator"
|
||||
"k8s.io/apiserver/pkg/storage/cacher/metrics"
|
||||
"k8s.io/apiserver/pkg/storage/cacher/progress"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/component-base/tracing"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
const (
|
||||
// blockTimeout determines how long we're willing to block the request
|
||||
// to wait for a given resource version to be propagated to cache,
|
||||
// before terminating request and returning Timeout error with retry
|
||||
// after suggestion.
|
||||
blockTimeout = 3 * time.Second
|
||||
|
||||
// resourceVersionTooHighRetrySeconds is the seconds before a operation should be retried by the client
|
||||
// after receiving a 'too high resource version' error.
|
||||
resourceVersionTooHighRetrySeconds = 1
|
||||
|
||||
// defaultLowerBoundCapacity is a default value for event cache capacity's lower bound.
|
||||
// TODO: Figure out, to what value we can decreased it.
|
||||
defaultLowerBoundCapacity = 100
|
||||
|
||||
// defaultUpperBoundCapacity should be able to keep the required history.
|
||||
defaultUpperBoundCapacity = 100 * 1024
|
||||
)
|
||||
|
||||
// watchCacheEvent is a single "watch event" that is send to users of
|
||||
// watchCache. Additionally to a typical "watch.Event" it contains
|
||||
// the previous value of the object to enable proper filtering in the
|
||||
// upper layers.
|
||||
type watchCacheEvent struct {
|
||||
Type watch.EventType
|
||||
Object runtime.Object
|
||||
ObjLabels labels.Set
|
||||
ObjFields fields.Set
|
||||
PrevObject runtime.Object
|
||||
PrevObjLabels labels.Set
|
||||
PrevObjFields fields.Set
|
||||
Key string
|
||||
ResourceVersion uint64
|
||||
RecordTime time.Time
|
||||
}
|
||||
|
||||
// watchCache implements a Store interface.
|
||||
// However, it depends on the elements implementing runtime.Object interface.
|
||||
//
|
||||
// watchCache is a "sliding window" (with a limited capacity) of objects
|
||||
// observed from a watch.
|
||||
type watchCache struct {
|
||||
sync.RWMutex
|
||||
|
||||
// Condition on which lists are waiting for the fresh enough
|
||||
// resource version.
|
||||
cond *sync.Cond
|
||||
|
||||
// Maximum size of history window.
|
||||
capacity int
|
||||
|
||||
// upper bound of capacity since event cache has a dynamic size.
|
||||
upperBoundCapacity int
|
||||
|
||||
// lower bound of capacity since event cache has a dynamic size.
|
||||
lowerBoundCapacity int
|
||||
|
||||
// keyFunc is used to get a key in the underlying storage for a given object.
|
||||
keyFunc func(runtime.Object) (string, error)
|
||||
|
||||
// getAttrsFunc is used to get labels and fields of an object.
|
||||
getAttrsFunc func(runtime.Object) (labels.Set, fields.Set, error)
|
||||
|
||||
// cache is used a cyclic buffer - the "current" contents of it are
|
||||
// stored in [start_index%capacity, end_index%capacity) - so the
|
||||
// "current" contents have exactly end_index-start_index items.
|
||||
cache []*watchCacheEvent
|
||||
startIndex int
|
||||
endIndex int
|
||||
// removedEventSinceRelist holds the information whether any of the events
|
||||
// were already removed from the `cache` cyclic buffer since the last relist
|
||||
removedEventSinceRelist bool
|
||||
|
||||
// store will effectively support LIST operation from the "end of cache
|
||||
// history" i.e. from the moment just after the newest cached watched event.
|
||||
// It is necessary to effectively allow clients to start watching at now.
|
||||
// NOTE: We assume that <store> is thread-safe.
|
||||
store storeIndexer
|
||||
|
||||
// ResourceVersion up to which the watchCache is propagated.
|
||||
resourceVersion uint64
|
||||
|
||||
// ResourceVersion of the last list result (populated via Replace() method).
|
||||
listResourceVersion uint64
|
||||
|
||||
// This handler is run at the end of every successful Replace() method.
|
||||
onReplace func()
|
||||
|
||||
// This handler is run at the end of every Add/Update/Delete method
|
||||
// and additionally gets the previous value of the object.
|
||||
eventHandler func(*watchCacheEvent)
|
||||
|
||||
// for testing timeouts.
|
||||
clock clock.Clock
|
||||
|
||||
// eventFreshDuration defines the minimum watch history watchcache will store.
|
||||
eventFreshDuration time.Duration
|
||||
|
||||
// An underlying storage.Versioner.
|
||||
versioner storage.Versioner
|
||||
|
||||
// cacher's group resource
|
||||
groupResource schema.GroupResource
|
||||
|
||||
// For testing cache interval invalidation.
|
||||
indexValidator indexValidator
|
||||
|
||||
// Requests progress notification if there are requests waiting for watch
|
||||
// to be fresh
|
||||
waitingUntilFresh *progress.ConditionalProgressRequester
|
||||
|
||||
// Stores previous snapshots of orderedLister to allow serving requests from previous revisions.
|
||||
snapshots Snapshotter
|
||||
}
|
||||
|
||||
func newWatchCache(
|
||||
keyFunc func(runtime.Object) (string, error),
|
||||
eventHandler func(*watchCacheEvent),
|
||||
getAttrsFunc func(runtime.Object) (labels.Set, fields.Set, error),
|
||||
versioner storage.Versioner,
|
||||
indexers *cache.Indexers,
|
||||
clock clock.WithTicker,
|
||||
eventFreshDuration time.Duration,
|
||||
groupResource schema.GroupResource,
|
||||
progressRequester *progress.ConditionalProgressRequester) *watchCache {
|
||||
wc := &watchCache{
|
||||
capacity: defaultLowerBoundCapacity,
|
||||
keyFunc: keyFunc,
|
||||
getAttrsFunc: getAttrsFunc,
|
||||
cache: make([]*watchCacheEvent, defaultLowerBoundCapacity),
|
||||
lowerBoundCapacity: defaultLowerBoundCapacity,
|
||||
upperBoundCapacity: capacityUpperBound(eventFreshDuration),
|
||||
startIndex: 0,
|
||||
endIndex: 0,
|
||||
store: newStoreIndexer(indexers),
|
||||
resourceVersion: 0,
|
||||
listResourceVersion: 0,
|
||||
eventHandler: eventHandler,
|
||||
clock: clock,
|
||||
eventFreshDuration: eventFreshDuration,
|
||||
versioner: versioner,
|
||||
groupResource: groupResource,
|
||||
waitingUntilFresh: progressRequester,
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ListFromCacheSnapshot) {
|
||||
wc.snapshots = newStoreSnapshotter()
|
||||
}
|
||||
metrics.WatchCacheCapacity.WithLabelValues(groupResource.String()).Set(float64(wc.capacity))
|
||||
wc.cond = sync.NewCond(wc.RLocker())
|
||||
wc.indexValidator = wc.isIndexValidLocked
|
||||
|
||||
return wc
|
||||
}
|
||||
|
||||
// capacityUpperBound denotes the maximum possible capacity of the watch cache
|
||||
// to which it can resize.
|
||||
func capacityUpperBound(eventFreshDuration time.Duration) int {
|
||||
if eventFreshDuration <= DefaultEventFreshDuration {
|
||||
return defaultUpperBoundCapacity
|
||||
}
|
||||
// eventFreshDuration determines how long the watch events are supposed
|
||||
// to be stored in the watch cache.
|
||||
// In very high churn situations, there is a need to store more events
|
||||
// in the watch cache, hence it would have to be upsized accordingly.
|
||||
// Because of that, for larger values of eventFreshDuration, we set the
|
||||
// upper bound of the watch cache's capacity proportionally to the ratio
|
||||
// between eventFreshDuration and DefaultEventFreshDuration.
|
||||
// Given that the watch cache size can only double, we round up that
|
||||
// proportion to the next power of two.
|
||||
exponent := int(math.Ceil((math.Log2(eventFreshDuration.Seconds() / DefaultEventFreshDuration.Seconds()))))
|
||||
if maxExponent := int(math.Floor((math.Log2(math.MaxInt32 / defaultUpperBoundCapacity)))); exponent > maxExponent {
|
||||
// Making sure that the capacity's upper bound fits in a 32-bit integer.
|
||||
exponent = maxExponent
|
||||
klog.Warningf("Capping watch cache capacity upper bound to %v", defaultUpperBoundCapacity<<exponent)
|
||||
}
|
||||
return defaultUpperBoundCapacity << exponent
|
||||
}
|
||||
|
||||
// Add takes runtime.Object as an argument.
|
||||
func (w *watchCache) Add(obj interface{}) error {
|
||||
object, resourceVersion, err := w.objectToVersionedRuntimeObject(obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event := watch.Event{Type: watch.Added, Object: object}
|
||||
|
||||
f := func(elem *storeElement) error { return w.store.Add(elem) }
|
||||
return w.processEvent(event, resourceVersion, f)
|
||||
}
|
||||
|
||||
// Update takes runtime.Object as an argument.
|
||||
func (w *watchCache) Update(obj interface{}) error {
|
||||
object, resourceVersion, err := w.objectToVersionedRuntimeObject(obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event := watch.Event{Type: watch.Modified, Object: object}
|
||||
|
||||
f := func(elem *storeElement) error { return w.store.Update(elem) }
|
||||
return w.processEvent(event, resourceVersion, f)
|
||||
}
|
||||
|
||||
// Delete takes runtime.Object as an argument.
|
||||
func (w *watchCache) Delete(obj interface{}) error {
|
||||
object, resourceVersion, err := w.objectToVersionedRuntimeObject(obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event := watch.Event{Type: watch.Deleted, Object: object}
|
||||
|
||||
f := func(elem *storeElement) error { return w.store.Delete(elem) }
|
||||
return w.processEvent(event, resourceVersion, f)
|
||||
}
|
||||
|
||||
func (w *watchCache) objectToVersionedRuntimeObject(obj interface{}) (runtime.Object, uint64, error) {
|
||||
object, ok := obj.(runtime.Object)
|
||||
if !ok {
|
||||
return nil, 0, fmt.Errorf("obj does not implement runtime.Object interface: %v", obj)
|
||||
}
|
||||
resourceVersion, err := w.versioner.ObjectResourceVersion(object)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
return object, resourceVersion, nil
|
||||
}
|
||||
|
||||
// processEvent is safe as long as there is at most one call to it in flight
|
||||
// at any point in time.
|
||||
func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, updateFunc func(*storeElement) error) error {
|
||||
metrics.EventsReceivedCounter.WithLabelValues(w.groupResource.String()).Inc()
|
||||
|
||||
key, err := w.keyFunc(event.Object)
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't compute key: %v", err)
|
||||
}
|
||||
elem := &storeElement{Key: key, Object: event.Object}
|
||||
elem.Labels, elem.Fields, err = w.getAttrsFunc(event.Object)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
wcEvent := &watchCacheEvent{
|
||||
Type: event.Type,
|
||||
Object: elem.Object,
|
||||
ObjLabels: elem.Labels,
|
||||
ObjFields: elem.Fields,
|
||||
Key: key,
|
||||
ResourceVersion: resourceVersion,
|
||||
RecordTime: w.clock.Now(),
|
||||
}
|
||||
|
||||
// We can call w.store.Get() outside of a critical section,
|
||||
// because the w.store itself is thread-safe and the only
|
||||
// place where w.store is modified is below (via updateFunc)
|
||||
// and these calls are serialized because reflector is processing
|
||||
// events one-by-one.
|
||||
previous, exists, err := w.store.Get(elem)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if exists {
|
||||
previousElem := previous.(*storeElement)
|
||||
wcEvent.PrevObject = previousElem.Object
|
||||
wcEvent.PrevObjLabels = previousElem.Labels
|
||||
wcEvent.PrevObjFields = previousElem.Fields
|
||||
}
|
||||
|
||||
if err := func() error {
|
||||
w.Lock()
|
||||
defer w.Unlock()
|
||||
|
||||
w.updateCache(wcEvent)
|
||||
w.resourceVersion = resourceVersion
|
||||
defer w.cond.Broadcast()
|
||||
|
||||
err := updateFunc(elem)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if w.snapshots != nil {
|
||||
if orderedLister, ordered := w.store.(orderedLister); ordered {
|
||||
if w.isCacheFullLocked() {
|
||||
oldestRV := w.cache[w.startIndex%w.capacity].ResourceVersion
|
||||
w.snapshots.RemoveLess(oldestRV)
|
||||
}
|
||||
w.snapshots.Add(w.resourceVersion, orderedLister)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Avoid calling event handler under lock.
|
||||
// This is safe as long as there is at most one call to Add/Update/Delete and
|
||||
// UpdateResourceVersion in flight at any point in time, which is true now,
|
||||
// because reflector calls them synchronously from its main thread.
|
||||
if w.eventHandler != nil {
|
||||
w.eventHandler(wcEvent)
|
||||
}
|
||||
metrics.RecordResourceVersion(w.groupResource.String(), resourceVersion)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Assumes that lock is already held for write.
|
||||
func (w *watchCache) updateCache(event *watchCacheEvent) {
|
||||
w.resizeCacheLocked(event.RecordTime)
|
||||
if w.isCacheFullLocked() {
|
||||
// Cache is full - remove the oldest element.
|
||||
w.startIndex++
|
||||
w.removedEventSinceRelist = true
|
||||
}
|
||||
w.cache[w.endIndex%w.capacity] = event
|
||||
w.endIndex++
|
||||
}
|
||||
|
||||
// resizeCacheLocked resizes the cache if necessary:
|
||||
// - increases capacity by 2x if cache is full and all cached events occurred within last eventFreshDuration.
|
||||
// - decreases capacity by 2x when recent quarter of events occurred outside of eventFreshDuration(protect watchCache from flapping).
|
||||
func (w *watchCache) resizeCacheLocked(eventTime time.Time) {
|
||||
if w.isCacheFullLocked() && eventTime.Sub(w.cache[w.startIndex%w.capacity].RecordTime) < w.eventFreshDuration {
|
||||
capacity := min(w.capacity*2, w.upperBoundCapacity)
|
||||
if capacity > w.capacity {
|
||||
w.doCacheResizeLocked(capacity)
|
||||
}
|
||||
return
|
||||
}
|
||||
if w.isCacheFullLocked() && eventTime.Sub(w.cache[(w.endIndex-w.capacity/4)%w.capacity].RecordTime) > w.eventFreshDuration {
|
||||
capacity := max(w.capacity/2, w.lowerBoundCapacity)
|
||||
if capacity < w.capacity {
|
||||
w.doCacheResizeLocked(capacity)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// isCacheFullLocked used to judge whether watchCacheEvent is full.
|
||||
// Assumes that lock is already held for write.
|
||||
func (w *watchCache) isCacheFullLocked() bool {
|
||||
return w.endIndex == w.startIndex+w.capacity
|
||||
}
|
||||
|
||||
// doCacheResizeLocked resize watchCache's event array with different capacity.
|
||||
// Assumes that lock is already held for write.
|
||||
func (w *watchCache) doCacheResizeLocked(capacity int) {
|
||||
newCache := make([]*watchCacheEvent, capacity)
|
||||
if capacity < w.capacity {
|
||||
// adjust startIndex if cache capacity shrink.
|
||||
w.startIndex = w.endIndex - capacity
|
||||
}
|
||||
for i := w.startIndex; i < w.endIndex; i++ {
|
||||
newCache[i%capacity] = w.cache[i%w.capacity]
|
||||
}
|
||||
w.cache = newCache
|
||||
metrics.RecordsWatchCacheCapacityChange(w.groupResource.String(), w.capacity, capacity)
|
||||
w.capacity = capacity
|
||||
}
|
||||
|
||||
func (w *watchCache) UpdateResourceVersion(resourceVersion string) {
|
||||
rv, err := w.versioner.ParseResourceVersion(resourceVersion)
|
||||
if err != nil {
|
||||
klog.Errorf("Couldn't parse resourceVersion: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
func() {
|
||||
w.Lock()
|
||||
defer w.Unlock()
|
||||
w.resourceVersion = rv
|
||||
w.cond.Broadcast()
|
||||
}()
|
||||
|
||||
// Avoid calling event handler under lock.
|
||||
// This is safe as long as there is at most one call to Add/Update/Delete and
|
||||
// UpdateResourceVersion in flight at any point in time, which is true now,
|
||||
// because reflector calls them synchronously from its main thread.
|
||||
if w.eventHandler != nil {
|
||||
wcEvent := &watchCacheEvent{
|
||||
Type: watch.Bookmark,
|
||||
ResourceVersion: rv,
|
||||
}
|
||||
w.eventHandler(wcEvent)
|
||||
}
|
||||
metrics.RecordResourceVersion(w.groupResource.String(), rv)
|
||||
}
|
||||
|
||||
// List returns list of pointers to <storeElement> objects.
|
||||
func (w *watchCache) List() []interface{} {
|
||||
return w.store.List()
|
||||
}
|
||||
|
||||
// waitUntilFreshAndBlock waits until cache is at least as fresh as given <resourceVersion>.
|
||||
// NOTE: This function acquired lock and doesn't release it.
|
||||
// You HAVE TO explicitly call w.RUnlock() after this function.
|
||||
func (w *watchCache) waitUntilFreshAndBlock(ctx context.Context, resourceVersion uint64) error {
|
||||
startTime := w.clock.Now()
|
||||
defer func() {
|
||||
if resourceVersion > 0 {
|
||||
metrics.WatchCacheReadWait.WithContext(ctx).WithLabelValues(w.groupResource.String()).Observe(w.clock.Since(startTime).Seconds())
|
||||
}
|
||||
}()
|
||||
|
||||
// In case resourceVersion is 0, we accept arbitrarily stale result.
|
||||
// As a result, the condition in the below for loop will never be
|
||||
// satisfied (w.resourceVersion is never negative), this call will
|
||||
// never hit the w.cond.Wait().
|
||||
// As a result - we can optimize the code by not firing the wakeup
|
||||
// function (and avoid starting a gorotuine), especially given that
|
||||
// resourceVersion=0 is the most common case.
|
||||
if resourceVersion > 0 {
|
||||
go func() {
|
||||
// Wake us up when the time limit has expired. The docs
|
||||
// promise that time.After (well, NewTimer, which it calls)
|
||||
// will wait *at least* the duration given. Since this go
|
||||
// routine starts sometime after we record the start time, and
|
||||
// it will wake up the loop below sometime after the broadcast,
|
||||
// we don't need to worry about waking it up before the time
|
||||
// has expired accidentally.
|
||||
<-w.clock.After(blockTimeout)
|
||||
w.cond.Broadcast()
|
||||
}()
|
||||
}
|
||||
|
||||
w.RLock()
|
||||
span := tracing.SpanFromContext(ctx)
|
||||
span.AddEvent("watchCache locked acquired")
|
||||
for w.resourceVersion < resourceVersion {
|
||||
if w.clock.Since(startTime) >= blockTimeout {
|
||||
// Request that the client retry after 'resourceVersionTooHighRetrySeconds' seconds.
|
||||
return storage.NewTooLargeResourceVersionError(resourceVersion, w.resourceVersion, resourceVersionTooHighRetrySeconds)
|
||||
}
|
||||
w.cond.Wait()
|
||||
}
|
||||
span.AddEvent("watchCache fresh enough")
|
||||
return nil
|
||||
}
|
||||
|
||||
type sortableStoreElements []interface{}
|
||||
|
||||
func (s sortableStoreElements) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortableStoreElements) Less(i, j int) bool {
|
||||
return s[i].(*storeElement).Key < s[j].(*storeElement).Key
|
||||
}
|
||||
|
||||
func (s sortableStoreElements) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
// WaitUntilFreshAndList returns list of pointers to `storeElement` objects along
|
||||
// with their ResourceVersion and the name of the index, if any, that was used.
|
||||
func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, key string, opts storage.ListOptions) (resp listResp, index string, err error) {
|
||||
if delegator.ConsistentReadSupported() && w.notFresh(resourceVersion) {
|
||||
w.waitingUntilFresh.Add()
|
||||
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
|
||||
w.waitingUntilFresh.Remove()
|
||||
} else {
|
||||
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
|
||||
}
|
||||
|
||||
defer w.RUnlock()
|
||||
if err != nil {
|
||||
return listResp{}, "", err
|
||||
}
|
||||
return w.list(ctx, resourceVersion, key, opts)
|
||||
}
|
||||
|
||||
// NOTICE: Structure follows the shouldDelegateList function in
|
||||
// staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go
|
||||
func (w *watchCache) list(ctx context.Context, resourceVersion uint64, key string, opts storage.ListOptions) (resp listResp, index string, err error) {
|
||||
switch opts.ResourceVersionMatch {
|
||||
case metav1.ResourceVersionMatchExact:
|
||||
return w.listExactRV(key, "", resourceVersion)
|
||||
case metav1.ResourceVersionMatchNotOlderThan:
|
||||
case "":
|
||||
// Continue
|
||||
if len(opts.Predicate.Continue) > 0 {
|
||||
continueKey, continueRV, err := storage.DecodeContinue(opts.Predicate.Continue, key)
|
||||
if err != nil {
|
||||
return listResp{}, "", errors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
|
||||
}
|
||||
if continueRV > 0 {
|
||||
return w.listExactRV(key, continueKey, uint64(continueRV))
|
||||
} else {
|
||||
// Continue with negative RV is a consistent read - already handled via waitUntilFreshAndBlock.
|
||||
// Don't pass matchValues as they don't support continueKey
|
||||
return w.listLatestRV(key, continueKey, nil)
|
||||
}
|
||||
}
|
||||
// Legacy exact match
|
||||
if opts.Predicate.Limit > 0 && len(opts.ResourceVersion) > 0 && opts.ResourceVersion != "0" {
|
||||
return w.listExactRV(key, "", resourceVersion)
|
||||
}
|
||||
// Consistent Read - already handled via waitUntilFreshAndBlock
|
||||
}
|
||||
return w.listLatestRV(key, "", opts.Predicate.MatcherIndex(ctx))
|
||||
}
|
||||
|
||||
func (w *watchCache) listExactRV(key, continueKey string, resourceVersion uint64) (resp listResp, index string, err error) {
|
||||
if w.snapshots == nil {
|
||||
return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
|
||||
}
|
||||
store, ok := w.snapshots.GetLessOrEqual(resourceVersion)
|
||||
if !ok {
|
||||
return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
|
||||
}
|
||||
items := store.ListPrefix(key, continueKey)
|
||||
return listResp{
|
||||
Items: items,
|
||||
ResourceVersion: resourceVersion,
|
||||
}, "", nil
|
||||
}
|
||||
|
||||
func (w *watchCache) listLatestRV(key, continueKey string, matchValues []storage.MatchValue) (resp listResp, index string, err error) {
|
||||
// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
|
||||
// requirement here is to NOT miss anything that should be returned. We can return as many non-matching items as we
|
||||
// want - they will be filtered out later. The fact that we return less things is only further performance improvement.
|
||||
// TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible.
|
||||
for _, matchValue := range matchValues {
|
||||
if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil {
|
||||
result, err = filterPrefixAndOrder(key, result)
|
||||
return listResp{
|
||||
Items: result,
|
||||
ResourceVersion: w.resourceVersion,
|
||||
}, matchValue.IndexName, err
|
||||
}
|
||||
}
|
||||
if store, ok := w.store.(orderedLister); ok {
|
||||
result := store.ListPrefix(key, continueKey)
|
||||
return listResp{
|
||||
Items: result,
|
||||
ResourceVersion: w.resourceVersion,
|
||||
}, "", nil
|
||||
}
|
||||
result := w.store.List()
|
||||
result, err = filterPrefixAndOrder(key, result)
|
||||
return listResp{
|
||||
Items: result,
|
||||
ResourceVersion: w.resourceVersion,
|
||||
}, "", err
|
||||
}
|
||||
|
||||
func filterPrefixAndOrder(prefix string, items []interface{}) ([]interface{}, error) {
|
||||
var result []interface{}
|
||||
for _, item := range items {
|
||||
elem, ok := item.(*storeElement)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("non *storeElement returned from storage: %v", item)
|
||||
}
|
||||
if !hasPathPrefix(elem.Key, prefix) {
|
||||
continue
|
||||
}
|
||||
result = append(result, item)
|
||||
}
|
||||
sort.Sort(sortableStoreElements(result))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (w *watchCache) notFresh(resourceVersion uint64) bool {
|
||||
w.RLock()
|
||||
defer w.RUnlock()
|
||||
return resourceVersion > w.resourceVersion
|
||||
}
|
||||
|
||||
// WaitUntilFreshAndGet returns a pointers to <storeElement> object.
|
||||
func (w *watchCache) WaitUntilFreshAndGet(ctx context.Context, resourceVersion uint64, key string) (interface{}, bool, uint64, error) {
|
||||
var err error
|
||||
if delegator.ConsistentReadSupported() && w.notFresh(resourceVersion) {
|
||||
w.waitingUntilFresh.Add()
|
||||
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
|
||||
w.waitingUntilFresh.Remove()
|
||||
} else {
|
||||
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
|
||||
}
|
||||
defer w.RUnlock()
|
||||
if err != nil {
|
||||
return nil, false, 0, err
|
||||
}
|
||||
value, exists, err := w.store.GetByKey(key)
|
||||
return value, exists, w.resourceVersion, err
|
||||
}
|
||||
|
||||
func (w *watchCache) ListKeys() []string {
|
||||
return w.store.ListKeys()
|
||||
}
|
||||
|
||||
// Get takes runtime.Object as a parameter. However, it returns
|
||||
// pointer to <storeElement>.
|
||||
func (w *watchCache) Get(obj interface{}) (interface{}, bool, error) {
|
||||
object, ok := obj.(runtime.Object)
|
||||
if !ok {
|
||||
return nil, false, fmt.Errorf("obj does not implement runtime.Object interface: %v", obj)
|
||||
}
|
||||
key, err := w.keyFunc(object)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("couldn't compute key: %v", err)
|
||||
}
|
||||
|
||||
return w.store.Get(&storeElement{Key: key, Object: object})
|
||||
}
|
||||
|
||||
// GetByKey returns pointer to <storeElement>.
|
||||
func (w *watchCache) GetByKey(key string) (interface{}, bool, error) {
|
||||
return w.store.GetByKey(key)
|
||||
}
|
||||
|
||||
// Replace takes slice of runtime.Object as a parameter.
|
||||
func (w *watchCache) Replace(objs []interface{}, resourceVersion string) error {
|
||||
version, err := w.versioner.ParseResourceVersion(resourceVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
toReplace := make([]interface{}, 0, len(objs))
|
||||
for _, obj := range objs {
|
||||
object, ok := obj.(runtime.Object)
|
||||
if !ok {
|
||||
return fmt.Errorf("didn't get runtime.Object for replace: %#v", obj)
|
||||
}
|
||||
key, err := w.keyFunc(object)
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't compute key: %v", err)
|
||||
}
|
||||
objLabels, objFields, err := w.getAttrsFunc(object)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
toReplace = append(toReplace, &storeElement{
|
||||
Key: key,
|
||||
Object: object,
|
||||
Labels: objLabels,
|
||||
Fields: objFields,
|
||||
})
|
||||
}
|
||||
|
||||
w.Lock()
|
||||
defer w.Unlock()
|
||||
|
||||
// Ensure startIndex never decreases, so that existing watchCacheInterval
|
||||
// instances get "invalid" errors if the try to download from the buffer
|
||||
// using their own start/end indexes calculated from previous buffer
|
||||
// content.
|
||||
|
||||
// Empty the cyclic buffer, ensuring startIndex doesn't decrease.
|
||||
w.startIndex = w.endIndex
|
||||
w.removedEventSinceRelist = false
|
||||
|
||||
if err := w.store.Replace(toReplace, resourceVersion); err != nil {
|
||||
return err
|
||||
}
|
||||
if w.snapshots != nil {
|
||||
w.snapshots.Reset()
|
||||
if orderedLister, ordered := w.store.(orderedLister); ordered {
|
||||
w.snapshots.Add(version, orderedLister)
|
||||
}
|
||||
}
|
||||
w.listResourceVersion = version
|
||||
w.resourceVersion = version
|
||||
if w.onReplace != nil {
|
||||
w.onReplace()
|
||||
}
|
||||
w.cond.Broadcast()
|
||||
|
||||
metrics.RecordResourceVersion(w.groupResource.String(), version)
|
||||
klog.V(3).Infof("Replaced watchCache (rev: %v) ", resourceVersion)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *watchCache) SetOnReplace(onReplace func()) {
|
||||
w.Lock()
|
||||
defer w.Unlock()
|
||||
w.onReplace = onReplace
|
||||
}
|
||||
|
||||
func (w *watchCache) Resync() error {
|
||||
// Nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *watchCache) getListResourceVersion() uint64 {
|
||||
w.RLock()
|
||||
defer w.RUnlock()
|
||||
return w.listResourceVersion
|
||||
}
|
||||
|
||||
func (w *watchCache) currentCapacity() int {
|
||||
w.RLock()
|
||||
defer w.RUnlock()
|
||||
return w.capacity
|
||||
}
|
||||
|
||||
const (
|
||||
// minWatchChanSize is the min size of channels used by the watch.
|
||||
// We keep that set to 10 for "backward compatibility" until we
|
||||
// convince ourselves based on some metrics that decreasing is safe.
|
||||
minWatchChanSize = 10
|
||||
// maxWatchChanSizeWithIndexAndTriger is the max size of the channel
|
||||
// used by the watch using the index and trigger selector.
|
||||
maxWatchChanSizeWithIndexAndTrigger = 10
|
||||
// maxWatchChanSizeWithIndexWithoutTrigger is the max size of the channel
|
||||
// used by the watch using the index but without triggering selector.
|
||||
// We keep that set to 1000 for "backward compatibility", until we
|
||||
// convinced ourselves based on some metrics that decreasing is safe.
|
||||
maxWatchChanSizeWithIndexWithoutTrigger = 1000
|
||||
// maxWatchChanSizeWithoutIndex is the max size of the channel
|
||||
// used by the watch not using the index.
|
||||
// TODO(wojtek-t): Figure out if the value shouldn't be higher.
|
||||
maxWatchChanSizeWithoutIndex = 100
|
||||
)
|
||||
|
||||
func (w *watchCache) suggestedWatchChannelSize(indexExists, triggerUsed bool) int {
|
||||
// To estimate the channel size we use a heuristic that a channel
|
||||
// should roughly be able to keep one second of history.
|
||||
// We don't have an exact data, but given we store updates from
|
||||
// the last <eventFreshDuration>, we approach it by dividing the
|
||||
// capacity by the length of the history window.
|
||||
chanSize := int(math.Ceil(float64(w.currentCapacity()) / w.eventFreshDuration.Seconds()))
|
||||
|
||||
// Finally we adjust the size to avoid ending with too low or
|
||||
// to large values.
|
||||
if chanSize < minWatchChanSize {
|
||||
chanSize = minWatchChanSize
|
||||
}
|
||||
var maxChanSize int
|
||||
switch {
|
||||
case indexExists && triggerUsed:
|
||||
maxChanSize = maxWatchChanSizeWithIndexAndTrigger
|
||||
case indexExists && !triggerUsed:
|
||||
maxChanSize = maxWatchChanSizeWithIndexWithoutTrigger
|
||||
case !indexExists:
|
||||
maxChanSize = maxWatchChanSizeWithoutIndex
|
||||
}
|
||||
if chanSize > maxChanSize {
|
||||
chanSize = maxChanSize
|
||||
}
|
||||
return chanSize
|
||||
}
|
||||
|
||||
// isIndexValidLocked checks if a given index is still valid.
|
||||
// This assumes that the lock is held.
|
||||
func (w *watchCache) isIndexValidLocked(index int) bool {
|
||||
return index >= w.startIndex
|
||||
}
|
||||
|
||||
// getAllEventsSinceLocked returns a watchCacheInterval that can be used to
|
||||
// retrieve events since a certain resourceVersion. This function assumes to
|
||||
// be called under the watchCache lock.
|
||||
func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64, key string, opts storage.ListOptions) (*watchCacheInterval, error) {
|
||||
_, matchesSingle := opts.Predicate.MatchesSingle()
|
||||
matchesSingle = matchesSingle && !opts.Recursive
|
||||
if opts.SendInitialEvents != nil && *opts.SendInitialEvents {
|
||||
return w.getIntervalFromStoreLocked(key, matchesSingle)
|
||||
}
|
||||
|
||||
size := w.endIndex - w.startIndex
|
||||
var oldest uint64
|
||||
switch {
|
||||
case w.listResourceVersion > 0 && !w.removedEventSinceRelist:
|
||||
// If no event was removed from the buffer since last relist, the oldest watch
|
||||
// event we can deliver is one greater than the resource version of the list.
|
||||
oldest = w.listResourceVersion + 1
|
||||
case size > 0:
|
||||
// If the previous condition is not satisfied: either some event was already
|
||||
// removed from the buffer or we've never completed a list (the latter can
|
||||
// only happen in unit tests that populate the buffer without performing
|
||||
// list/replace operations), the oldest watch event we can deliver is the first
|
||||
// one in the buffer.
|
||||
oldest = w.cache[w.startIndex%w.capacity].ResourceVersion
|
||||
default:
|
||||
return nil, fmt.Errorf("watch cache isn't correctly initialized")
|
||||
}
|
||||
|
||||
if resourceVersion == 0 {
|
||||
if opts.SendInitialEvents == nil {
|
||||
// resourceVersion = 0 means that we don't require any specific starting point
|
||||
// and we would like to start watching from ~now.
|
||||
// However, to keep backward compatibility, we additionally need to return the
|
||||
// current state and only then start watching from that point.
|
||||
//
|
||||
// TODO: In v2 api, we should stop returning the current state - #13969.
|
||||
return w.getIntervalFromStoreLocked(key, matchesSingle)
|
||||
}
|
||||
// SendInitialEvents = false and resourceVersion = 0
|
||||
// means that the request would like to start watching
|
||||
// from Any resourceVersion
|
||||
resourceVersion = w.resourceVersion
|
||||
}
|
||||
if resourceVersion < oldest-1 {
|
||||
return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d (%d)", resourceVersion, oldest-1))
|
||||
}
|
||||
|
||||
// Binary search the smallest index at which resourceVersion is greater than the given one.
|
||||
f := func(i int) bool {
|
||||
return w.cache[(w.startIndex+i)%w.capacity].ResourceVersion > resourceVersion
|
||||
}
|
||||
first := sort.Search(size, f)
|
||||
indexerFunc := func(i int) *watchCacheEvent {
|
||||
return w.cache[i%w.capacity]
|
||||
}
|
||||
ci := newCacheInterval(w.startIndex+first, w.endIndex, indexerFunc, w.indexValidator, resourceVersion, w.RWMutex.RLocker())
|
||||
return ci, nil
|
||||
}
|
||||
|
||||
// getIntervalFromStoreLocked returns a watchCacheInterval
|
||||
// that covers the entire storage state.
|
||||
// This function assumes to be called under the watchCache lock.
|
||||
func (w *watchCache) getIntervalFromStoreLocked(key string, matchesSingle bool) (*watchCacheInterval, error) {
|
||||
ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, key, matchesSingle)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ci, nil
|
||||
}
|
255
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache_interval.go
generated
vendored
255
e2e/vendor/k8s.io/apiserver/pkg/storage/cacher/watch_cache_interval.go
generated
vendored
@ -1,255 +0,0 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cacher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
)
|
||||
|
||||
// watchCacheInterval serves as an abstraction over a source
|
||||
// of watchCacheEvents. It maintains a window of events over
|
||||
// an underlying source and these events can be served using
|
||||
// the exposed Next() API. The main intent for doing things
|
||||
// this way is to introduce an upper bound of memory usage
|
||||
// for starting a watch and reduce the maximum possible time
|
||||
// interval for which the lock would be held while events are
|
||||
// copied over.
|
||||
//
|
||||
// The source of events for the interval is typically either
|
||||
// the watchCache circular buffer, if events being retrieved
|
||||
// need to be for resource versions > 0 or the underlying
|
||||
// implementation of Store, if resource version = 0.
|
||||
//
|
||||
// Furthermore, an interval can be either valid or invalid at
|
||||
// any given point of time. The notion of validity makes sense
|
||||
// only in cases where the window of events in the underlying
|
||||
// source can change over time - i.e. for watchCache circular
|
||||
// buffer. When the circular buffer is full and an event needs
|
||||
// to be popped off, watchCache::startIndex is incremented. In
|
||||
// this case, an interval tracking that popped event is valid
|
||||
// only if it has already been copied to its internal buffer.
|
||||
// However, for efficiency we perform that lazily and we mark
|
||||
// an interval as invalid iff we need to copy events from the
|
||||
// watchCache and we end up needing events that have already
|
||||
// been popped off. This translates to the following condition:
|
||||
//
|
||||
// watchCacheInterval::startIndex >= watchCache::startIndex.
|
||||
//
|
||||
// When this condition becomes false, the interval is no longer
|
||||
// valid and should not be used to retrieve and serve elements
|
||||
// from the underlying source.
|
||||
type watchCacheInterval struct {
|
||||
// startIndex denotes the starting point of the interval
|
||||
// being considered. The value is the index in the actual
|
||||
// source of watchCacheEvents. If the source of events is
|
||||
// the watchCache, then this must be used modulo capacity.
|
||||
startIndex int
|
||||
|
||||
// endIndex denotes the ending point of the interval being
|
||||
// considered. The value is the index in the actual source
|
||||
// of events. If the source of the events is the watchCache,
|
||||
// then this should be used modulo capacity.
|
||||
endIndex int
|
||||
|
||||
// indexer is meant to inject behaviour for how an event must
|
||||
// be retrieved from the underlying source given an index.
|
||||
indexer indexerFunc
|
||||
|
||||
// indexValidator is used to check if a given index is still
|
||||
// valid perspective. If it is deemed that the index is not
|
||||
// valid, then this interval can no longer be used to serve
|
||||
// events. Use of indexValidator is warranted only in cases
|
||||
// where the window of events in the underlying source can
|
||||
// change over time. Furthermore, an interval is invalid if
|
||||
// its startIndex no longer coincides with the startIndex of
|
||||
// underlying source.
|
||||
indexValidator indexValidator
|
||||
|
||||
// buffer holds watchCacheEvents that this interval returns on
|
||||
// a call to Next(). This exists mainly to reduce acquiring the
|
||||
// lock on each invocation of Next().
|
||||
buffer *watchCacheIntervalBuffer
|
||||
|
||||
// resourceVersion is the resourceVersion from which
|
||||
// the interval was constructed.
|
||||
resourceVersion uint64
|
||||
|
||||
// lock effectively protects access to the underlying source
|
||||
// of events through - indexer and indexValidator.
|
||||
//
|
||||
// Given that indexer and indexValidator only read state, if
|
||||
// possible, Locker obtained through RLocker() is provided.
|
||||
lock sync.Locker
|
||||
|
||||
// initialEventsEndBookmark will be sent after sending all events in cacheInterval
|
||||
initialEventsEndBookmark *watchCacheEvent
|
||||
}
|
||||
|
||||
type indexerFunc func(int) *watchCacheEvent
|
||||
type indexValidator func(int) bool
|
||||
|
||||
func newCacheInterval(startIndex, endIndex int, indexer indexerFunc, indexValidator indexValidator, resourceVersion uint64, locker sync.Locker) *watchCacheInterval {
|
||||
return &watchCacheInterval{
|
||||
startIndex: startIndex,
|
||||
endIndex: endIndex,
|
||||
indexer: indexer,
|
||||
indexValidator: indexValidator,
|
||||
buffer: &watchCacheIntervalBuffer{buffer: make([]*watchCacheEvent, bufferSize)},
|
||||
resourceVersion: resourceVersion,
|
||||
lock: locker,
|
||||
}
|
||||
}
|
||||
|
||||
type sortableWatchCacheEvents []*watchCacheEvent
|
||||
|
||||
func (s sortableWatchCacheEvents) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortableWatchCacheEvents) Less(i, j int) bool {
|
||||
return s[i].Key < s[j].Key
|
||||
}
|
||||
|
||||
func (s sortableWatchCacheEvents) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
// newCacheIntervalFromStore is meant to handle the case of rv=0, such that the events
|
||||
// returned by Next() need to be events from a List() done on the underlying store of
|
||||
// the watch cache.
|
||||
// The items returned in the interval will be sorted by Key.
|
||||
func newCacheIntervalFromStore(resourceVersion uint64, store storeIndexer, key string, matchesSingle bool) (*watchCacheInterval, error) {
|
||||
buffer := &watchCacheIntervalBuffer{}
|
||||
var allItems []interface{}
|
||||
if matchesSingle {
|
||||
item, exists, err := store.GetByKey(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if exists {
|
||||
allItems = append(allItems, item)
|
||||
}
|
||||
} else {
|
||||
allItems = store.List()
|
||||
}
|
||||
buffer.buffer = make([]*watchCacheEvent, len(allItems))
|
||||
for i, item := range allItems {
|
||||
elem, ok := item.(*storeElement)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("not a storeElement: %v", elem)
|
||||
}
|
||||
buffer.buffer[i] = &watchCacheEvent{
|
||||
Type: watch.Added,
|
||||
Object: elem.Object,
|
||||
ObjLabels: elem.Labels,
|
||||
ObjFields: elem.Fields,
|
||||
Key: elem.Key,
|
||||
ResourceVersion: resourceVersion,
|
||||
}
|
||||
buffer.endIndex++
|
||||
}
|
||||
sort.Sort(sortableWatchCacheEvents(buffer.buffer))
|
||||
ci := &watchCacheInterval{
|
||||
startIndex: 0,
|
||||
// Simulate that we already have all the events we're looking for.
|
||||
endIndex: 0,
|
||||
buffer: buffer,
|
||||
resourceVersion: resourceVersion,
|
||||
}
|
||||
|
||||
return ci, nil
|
||||
}
|
||||
|
||||
// Next returns the next item in the cache interval provided the cache
|
||||
// interval is still valid. An error is returned if the interval is
|
||||
// invalidated.
|
||||
func (wci *watchCacheInterval) Next() (*watchCacheEvent, error) {
|
||||
// if there are items in the buffer to return, return from
|
||||
// the buffer.
|
||||
if event, exists := wci.buffer.next(); exists {
|
||||
return event, nil
|
||||
}
|
||||
// check if there are still other events in this interval
|
||||
// that can be processed.
|
||||
if wci.startIndex >= wci.endIndex {
|
||||
return nil, nil
|
||||
}
|
||||
wci.lock.Lock()
|
||||
defer wci.lock.Unlock()
|
||||
|
||||
if valid := wci.indexValidator(wci.startIndex); !valid {
|
||||
return nil, fmt.Errorf("cache interval invalidated, interval startIndex: %d", wci.startIndex)
|
||||
}
|
||||
|
||||
wci.fillBuffer()
|
||||
if event, exists := wci.buffer.next(); exists {
|
||||
return event, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (wci *watchCacheInterval) fillBuffer() {
|
||||
wci.buffer.startIndex = 0
|
||||
wci.buffer.endIndex = 0
|
||||
for wci.startIndex < wci.endIndex && !wci.buffer.isFull() {
|
||||
event := wci.indexer(wci.startIndex)
|
||||
if event == nil {
|
||||
break
|
||||
}
|
||||
wci.buffer.buffer[wci.buffer.endIndex] = event
|
||||
wci.buffer.endIndex++
|
||||
wci.startIndex++
|
||||
}
|
||||
}
|
||||
|
||||
const bufferSize = 100
|
||||
|
||||
// watchCacheIntervalBuffer is used to reduce acquiring
|
||||
// the lock on each invocation of watchCacheInterval.Next().
|
||||
type watchCacheIntervalBuffer struct {
|
||||
// buffer is used to hold watchCacheEvents that
|
||||
// the interval returns on a call to Next().
|
||||
buffer []*watchCacheEvent
|
||||
// The first element of buffer is defined by startIndex,
|
||||
// its last element is defined by endIndex.
|
||||
startIndex int
|
||||
endIndex int
|
||||
}
|
||||
|
||||
// next returns the next event present in the interval buffer provided
|
||||
// it is not empty.
|
||||
func (wcib *watchCacheIntervalBuffer) next() (*watchCacheEvent, bool) {
|
||||
if wcib.isEmpty() {
|
||||
return nil, false
|
||||
}
|
||||
next := wcib.buffer[wcib.startIndex]
|
||||
wcib.startIndex++
|
||||
return next, true
|
||||
}
|
||||
|
||||
func (wcib *watchCacheIntervalBuffer) isFull() bool {
|
||||
return wcib.endIndex >= bufferSize
|
||||
}
|
||||
|
||||
func (wcib *watchCacheIntervalBuffer) isEmpty() bool {
|
||||
return wcib.startIndex == wcib.endIndex
|
||||
}
|
18
e2e/vendor/k8s.io/apiserver/pkg/storage/errors/doc.go
generated
vendored
18
e2e/vendor/k8s.io/apiserver/pkg/storage/errors/doc.go
generated
vendored
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2014 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package storage provides conversion of storage errors to API errors.
|
||||
package errors
|
128
e2e/vendor/k8s.io/apiserver/pkg/storage/errors/storage.go
generated
vendored
128
e2e/vendor/k8s.io/apiserver/pkg/storage/errors/storage.go
generated
vendored
@ -1,128 +0,0 @@
|
||||
/*
|
||||
Copyright 2014 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package errors
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
)
|
||||
|
||||
// InterpretListError converts a generic error on a retrieval
|
||||
// operation into the appropriate API error.
|
||||
func InterpretListError(err error, qualifiedResource schema.GroupResource) error {
|
||||
switch {
|
||||
case storage.IsNotFound(err):
|
||||
return errors.NewNotFound(qualifiedResource, "")
|
||||
case storage.IsUnreachable(err), storage.IsRequestTimeout(err):
|
||||
return errors.NewServerTimeout(qualifiedResource, "list", 2) // TODO: make configurable or handled at a higher level
|
||||
case storage.IsInternalError(err):
|
||||
return errors.NewInternalError(err)
|
||||
case storage.IsCorruptObject(err):
|
||||
return errors.NewInternalError(err)
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// InterpretGetError converts a generic error on a retrieval
|
||||
// operation into the appropriate API error.
|
||||
func InterpretGetError(err error, qualifiedResource schema.GroupResource, name string) error {
|
||||
switch {
|
||||
case storage.IsNotFound(err):
|
||||
return errors.NewNotFound(qualifiedResource, name)
|
||||
case storage.IsUnreachable(err):
|
||||
return errors.NewServerTimeout(qualifiedResource, "get", 2) // TODO: make configurable or handled at a higher level
|
||||
case storage.IsInternalError(err):
|
||||
return errors.NewInternalError(err)
|
||||
case storage.IsCorruptObject(err):
|
||||
return errors.NewInternalError(err)
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// InterpretCreateError converts a generic error on a create
|
||||
// operation into the appropriate API error.
|
||||
func InterpretCreateError(err error, qualifiedResource schema.GroupResource, name string) error {
|
||||
switch {
|
||||
case storage.IsExist(err):
|
||||
return errors.NewAlreadyExists(qualifiedResource, name)
|
||||
case storage.IsUnreachable(err):
|
||||
return errors.NewServerTimeout(qualifiedResource, "create", 2) // TODO: make configurable or handled at a higher level
|
||||
case storage.IsInternalError(err):
|
||||
return errors.NewInternalError(err)
|
||||
case storage.IsCorruptObject(err):
|
||||
return errors.NewInternalError(err)
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// InterpretUpdateError converts a generic error on an update
|
||||
// operation into the appropriate API error.
|
||||
func InterpretUpdateError(err error, qualifiedResource schema.GroupResource, name string) error {
|
||||
switch {
|
||||
case storage.IsConflict(err), storage.IsExist(err), storage.IsInvalidObj(err):
|
||||
return errors.NewConflict(qualifiedResource, name, err)
|
||||
case storage.IsUnreachable(err):
|
||||
return errors.NewServerTimeout(qualifiedResource, "update", 2) // TODO: make configurable or handled at a higher level
|
||||
case storage.IsNotFound(err):
|
||||
return errors.NewNotFound(qualifiedResource, name)
|
||||
case storage.IsInternalError(err):
|
||||
return errors.NewInternalError(err)
|
||||
case storage.IsCorruptObject(err):
|
||||
return errors.NewInternalError(err)
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// InterpretDeleteError converts a generic error on a delete
|
||||
// operation into the appropriate API error.
|
||||
func InterpretDeleteError(err error, qualifiedResource schema.GroupResource, name string) error {
|
||||
switch {
|
||||
case storage.IsNotFound(err):
|
||||
return errors.NewNotFound(qualifiedResource, name)
|
||||
case storage.IsUnreachable(err):
|
||||
return errors.NewServerTimeout(qualifiedResource, "delete", 2) // TODO: make configurable or handled at a higher level
|
||||
case storage.IsConflict(err), storage.IsExist(err), storage.IsInvalidObj(err):
|
||||
return errors.NewConflict(qualifiedResource, name, err)
|
||||
case storage.IsInternalError(err):
|
||||
return errors.NewInternalError(err)
|
||||
case storage.IsCorruptObject(err):
|
||||
return errors.NewInternalError(err)
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// InterpretWatchError converts a generic error on a watch
|
||||
// operation into the appropriate API error.
|
||||
func InterpretWatchError(err error, resource schema.GroupResource, name string) error {
|
||||
switch {
|
||||
case storage.IsInvalidError(err):
|
||||
invalidError, _ := err.(storage.InvalidError)
|
||||
return errors.NewInvalid(schema.GroupKind{Group: resource.Group, Kind: resource.Resource}, name, invalidError.Errs)
|
||||
case storage.IsInternalError(err):
|
||||
return errors.NewInternalError(err)
|
||||
case storage.IsCorruptObject(err):
|
||||
return errors.NewInternalError(err)
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
7
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/OWNERS
generated
vendored
7
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/OWNERS
generated
vendored
@ -1,7 +0,0 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
reviewers:
|
||||
- wojtek-t
|
||||
- serathius
|
||||
labels:
|
||||
- sig/etcd
|
162
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/compact.go
generated
vendored
162
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/compact.go
generated
vendored
@ -1,162 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
compactRevKey = "compact_rev_key"
|
||||
)
|
||||
|
||||
var (
|
||||
endpointsMapMu sync.Mutex
|
||||
endpointsMap map[string]struct{}
|
||||
)
|
||||
|
||||
func init() {
|
||||
endpointsMap = make(map[string]struct{})
|
||||
}
|
||||
|
||||
// StartCompactor starts a compactor in the background to compact old version of keys that's not needed.
|
||||
// By default, we save the most recent 5 minutes data and compact versions > 5minutes ago.
|
||||
// It should be enough for slow watchers and to tolerate burst.
|
||||
// TODO: We might keep a longer history (12h) in the future once storage API can take advantage of past version of keys.
|
||||
func StartCompactor(ctx context.Context, client *clientv3.Client, compactInterval time.Duration) {
|
||||
endpointsMapMu.Lock()
|
||||
defer endpointsMapMu.Unlock()
|
||||
|
||||
// In one process, we can have only one compactor for one cluster.
|
||||
// Currently we rely on endpoints to differentiate clusters.
|
||||
for _, ep := range client.Endpoints() {
|
||||
if _, ok := endpointsMap[ep]; ok {
|
||||
klog.V(4).Infof("compactor already exists for endpoints %v", client.Endpoints())
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, ep := range client.Endpoints() {
|
||||
endpointsMap[ep] = struct{}{}
|
||||
}
|
||||
|
||||
if compactInterval != 0 {
|
||||
go compactor(ctx, client, compactInterval)
|
||||
}
|
||||
}
|
||||
|
||||
// compactor periodically compacts historical versions of keys in etcd.
|
||||
// It will compact keys with versions older than given interval.
|
||||
// In other words, after compaction, it will only contain keys set during last interval.
|
||||
// Any API call for the older versions of keys will return error.
|
||||
// Interval is the time interval between each compaction. The first compaction happens after "interval".
|
||||
func compactor(ctx context.Context, client *clientv3.Client, interval time.Duration) {
|
||||
// Technical definitions:
|
||||
// We have a special key in etcd defined as *compactRevKey*.
|
||||
// compactRevKey's value will be set to the string of last compacted revision.
|
||||
// compactRevKey's version will be used as logical time for comparison. THe version is referred as compact time.
|
||||
// Initially, because the key doesn't exist, the compact time (version) is 0.
|
||||
//
|
||||
// Algorithm:
|
||||
// - Compare to see if (local compact_time) = (remote compact_time).
|
||||
// - If yes, increment both local and remote compact_time, and do a compaction.
|
||||
// - If not, set local to remote compact_time.
|
||||
//
|
||||
// Technical details/insights:
|
||||
//
|
||||
// The protocol here is lease based. If one compactor CAS successfully, the others would know it when they fail in
|
||||
// CAS later and would try again in 5 minutes. If an APIServer crashed, another one would "take over" the lease.
|
||||
//
|
||||
// For example, in the following diagram, we have a compactor C1 doing compaction in t1, t2. Another compactor C2
|
||||
// at t1' (t1 < t1' < t2) would CAS fail, set its known oldRev to rev at t1', and try again in t2' (t2' > t2).
|
||||
// If C1 crashed and wouldn't compact at t2, C2 would CAS successfully at t2'.
|
||||
//
|
||||
// oldRev(t2) curRev(t2)
|
||||
// +
|
||||
// oldRev curRev |
|
||||
// + + |
|
||||
// | | |
|
||||
// | | t1' | t2'
|
||||
// +---v-------------v----^---------v------^---->
|
||||
// t0 t1 t2
|
||||
//
|
||||
// We have the guarantees:
|
||||
// - in normal cases, the interval is 5 minutes.
|
||||
// - in failover, the interval is >5m and <10m
|
||||
//
|
||||
// FAQ:
|
||||
// - What if time is not accurate? We don't care as long as someone did the compaction. Atomicity is ensured using
|
||||
// etcd API.
|
||||
// - What happened under heavy load scenarios? Initially, each apiserver will do only one compaction
|
||||
// every 5 minutes. This is very unlikely affecting or affected w.r.t. server load.
|
||||
|
||||
var compactTime int64
|
||||
var rev int64
|
||||
var err error
|
||||
for {
|
||||
select {
|
||||
case <-time.After(interval):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
compactTime, rev, err = compact(ctx, client, compactTime, rev)
|
||||
if err != nil {
|
||||
klog.Errorf("etcd: endpoint (%v) compact failed: %v", client.Endpoints(), err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// compact compacts etcd store and returns current rev.
|
||||
// It will return the current compact time and global revision if no error occurred.
|
||||
// Note that CAS fail will not incur any error.
|
||||
func compact(ctx context.Context, client *clientv3.Client, t, rev int64) (int64, int64, error) {
|
||||
resp, err := client.KV.Txn(ctx).If(
|
||||
clientv3.Compare(clientv3.Version(compactRevKey), "=", t),
|
||||
).Then(
|
||||
clientv3.OpPut(compactRevKey, strconv.FormatInt(rev, 10)), // Expect side effect: increment Version
|
||||
).Else(
|
||||
clientv3.OpGet(compactRevKey),
|
||||
).Commit()
|
||||
if err != nil {
|
||||
return t, rev, err
|
||||
}
|
||||
|
||||
curRev := resp.Header.Revision
|
||||
|
||||
if !resp.Succeeded {
|
||||
curTime := resp.Responses[0].GetResponseRange().Kvs[0].Version
|
||||
return curTime, curRev, nil
|
||||
}
|
||||
curTime := t + 1
|
||||
|
||||
if rev == 0 {
|
||||
// We don't compact on bootstrap.
|
||||
return curTime, curRev, nil
|
||||
}
|
||||
if _, err = client.Compact(ctx, rev); err != nil {
|
||||
return curTime, curRev, err
|
||||
}
|
||||
klog.V(4).Infof("etcd: compacted rev (%d), endpoints (%v)", rev, client.Endpoints())
|
||||
return curTime, curRev, nil
|
||||
}
|
270
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/corrupt_obj_deleter.go
generated
vendored
270
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/corrupt_obj_deleter.go
generated
vendored
@ -1,270 +0,0 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// NewStoreWithUnsafeCorruptObjectDeletion wraps the given store implementation
|
||||
// and adds support for unsafe deletion of corrupt objects
|
||||
func NewStoreWithUnsafeCorruptObjectDeletion(delegate storage.Interface, gr schema.GroupResource) storage.Interface {
|
||||
return &corruptObjectDeleter{
|
||||
Interface: delegate,
|
||||
groupResource: gr,
|
||||
}
|
||||
}
|
||||
|
||||
// WithCorruptObjErrorHandlingDecoder decorates the given decoder, it determines
|
||||
// if the error returned by the given decoder represents a corrupt object (the
|
||||
// object is undecodable), and then it wraps the error appropriately so the
|
||||
// unsafe deleter can determine if the object is a candidate for unsafe deletion
|
||||
func WithCorruptObjErrorHandlingDecoder(decoder Decoder) Decoder {
|
||||
return &corruptObjErrorInterpretingDecoder{Decoder: decoder}
|
||||
}
|
||||
|
||||
// WithCorruptObjErrorHandlingTransformer decorates the given decoder, it
|
||||
// determines if the error returned by the given transformer represents a
|
||||
// corrupt object (the data from the storage is untransformable), and then it
|
||||
// wraps the error appropriately so the unsafe deleter can determine
|
||||
// if the object is a candidate for unsafe deletion
|
||||
func WithCorruptObjErrorHandlingTransformer(transformer value.Transformer) value.Transformer {
|
||||
return &corruptObjErrorInterpretingTransformer{Transformer: transformer}
|
||||
}
|
||||
|
||||
// corruptObjErrAggregatorFactory returns an error aggregator that aggregates
|
||||
// corrupt object error(s) that the list operation encounters while
|
||||
// retrieving objects from the storage.
|
||||
// maxCount: it is the maximum number of error that will be aggregated
|
||||
func corruptObjErrAggregatorFactory(maxCount int) func() ListErrorAggregator {
|
||||
if maxCount <= 0 {
|
||||
return defaultListErrorAggregatorFactory
|
||||
}
|
||||
return func() ListErrorAggregator {
|
||||
return &corruptObjErrAggregator{maxCount: maxCount}
|
||||
}
|
||||
}
|
||||
|
||||
var errTooMany = errors.New("too many errors, the list is truncated")
|
||||
|
||||
// aggregate corrupt object errors from the LIST operation
|
||||
type corruptObjErrAggregator struct {
|
||||
errs []error
|
||||
abortErr error
|
||||
maxCount int
|
||||
}
|
||||
|
||||
func (a *corruptObjErrAggregator) Aggregate(key string, err error) bool {
|
||||
if len(a.errs) >= a.maxCount {
|
||||
// add a sentinel error to indicate there are more
|
||||
a.errs = append(a.errs, errTooMany)
|
||||
return true
|
||||
}
|
||||
var corruptObjErr *corruptObjectError
|
||||
if errors.As(err, &corruptObjErr) {
|
||||
a.errs = append(a.errs, storage.NewCorruptObjError(key, corruptObjErr))
|
||||
return false
|
||||
}
|
||||
|
||||
// not a corrupt object error, the list operation should abort
|
||||
a.abortErr = err
|
||||
return true
|
||||
}
|
||||
|
||||
func (a *corruptObjErrAggregator) Err() error {
|
||||
switch {
|
||||
case len(a.errs) == 0 && a.abortErr != nil:
|
||||
return a.abortErr
|
||||
case len(a.errs) > 0:
|
||||
err := utilerrors.NewAggregate(a.errs)
|
||||
return &aggregatedStorageError{errs: err, resourcePrefix: "list"}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// corruptObjectDeleter facilitates unsafe deletion of corrupt objects for etcd
|
||||
type corruptObjectDeleter struct {
|
||||
storage.Interface
|
||||
groupResource schema.GroupResource
|
||||
}
|
||||
|
||||
func (s *corruptObjectDeleter) Get(ctx context.Context, key string, opts storage.GetOptions, out runtime.Object) error {
|
||||
if err := s.Interface.Get(ctx, key, opts, out); err != nil {
|
||||
var corruptObjErr *corruptObjectError
|
||||
if !errors.As(err, &corruptObjErr) {
|
||||
// this error does not represent a corrupt object
|
||||
return err
|
||||
}
|
||||
// the unsafe deleter at the registry layer will check whether
|
||||
// the given err represents a corrupt object in order to
|
||||
// initiate the unsafe deletion flow.
|
||||
return storage.NewCorruptObjError(key, corruptObjErr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *corruptObjectDeleter) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
|
||||
err := s.Interface.GetList(ctx, key, opts, listObj)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var aggregatedErr *aggregatedStorageError
|
||||
if errors.As(err, &aggregatedErr) {
|
||||
// we have aggregated a list of corrupt objects
|
||||
klog.V(5).ErrorS(aggregatedErr, "corrupt objects")
|
||||
return aggregatedErr.NewAPIStatusError(s.groupResource)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// corruptObjErrorInterpretingDecoder wraps the error returned by the decorated decoder
|
||||
type corruptObjErrorInterpretingDecoder struct {
|
||||
Decoder
|
||||
}
|
||||
|
||||
func (d *corruptObjErrorInterpretingDecoder) Decode(value []byte, objPtr runtime.Object, rev int64) error {
|
||||
// TODO: right now any error is deemed as undecodable, in
|
||||
// the future, we can apply some filter, if need be.
|
||||
if err := d.Decoder.Decode(value, objPtr, rev); err != nil {
|
||||
return &corruptObjectError{err: err, errType: undecodable, revision: rev}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeListItem decodes bytes value in array into object.
|
||||
func (d *corruptObjErrorInterpretingDecoder) DecodeListItem(ctx context.Context, data []byte, rev uint64, newItemFunc func() runtime.Object) (runtime.Object, error) {
|
||||
// TODO: right now any error is deemed as undecodable, in
|
||||
// the future, we can apply some filter, if need be.
|
||||
obj, err := d.Decoder.DecodeListItem(ctx, data, rev, newItemFunc)
|
||||
if err != nil {
|
||||
err = &corruptObjectError{err: err, errType: undecodable, revision: int64(rev)}
|
||||
}
|
||||
return obj, err
|
||||
}
|
||||
|
||||
// corruptObjErrorInterpretingTransformer wraps the error returned by the transformer
|
||||
type corruptObjErrorInterpretingTransformer struct {
|
||||
value.Transformer
|
||||
}
|
||||
|
||||
func (t *corruptObjErrorInterpretingTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
// TODO: right now any error is deemed as undecodable, in the future, we
|
||||
// can apply some filter, if need be. For example, any network error
|
||||
out, stale, err := t.Transformer.TransformFromStorage(ctx, data, dataCtx)
|
||||
if err != nil {
|
||||
err = &corruptObjectError{err: err, errType: untransformable}
|
||||
}
|
||||
return out, stale, err
|
||||
}
|
||||
|
||||
// corruptObjectError is used internally, only by the corrupt object
|
||||
// deleter, this error represents a corrup object:
|
||||
// a) the data from the storage failed to transform, or
|
||||
// b) the data failed to decode into an object
|
||||
// NOTE: this error does not have any information to identify the object
|
||||
// that is corrupt, for example the storage key associated with the object
|
||||
type corruptObjectError struct {
|
||||
err error
|
||||
errType int
|
||||
revision int64
|
||||
}
|
||||
|
||||
const (
|
||||
untransformable int = iota + 1
|
||||
undecodable
|
||||
)
|
||||
|
||||
var typeToMessage = map[int]string{
|
||||
untransformable: "data from the storage is not transformable",
|
||||
undecodable: "object not decodable",
|
||||
}
|
||||
|
||||
func (e *corruptObjectError) Unwrap() error { return e.err }
|
||||
func (e *corruptObjectError) Error() string {
|
||||
return fmt.Sprintf("%s revision=%d: %v", typeToMessage[e.errType], e.revision, e.err)
|
||||
}
|
||||
|
||||
// aggregatedStorageError holds an aggregated list of storage.StorageError
|
||||
type aggregatedStorageError struct {
|
||||
resourcePrefix string
|
||||
errs utilerrors.Aggregate
|
||||
}
|
||||
|
||||
func (e *aggregatedStorageError) Error() string {
|
||||
errs := e.errs.Errors()
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "unable to transform or decode %d objects: {\n", len(errs))
|
||||
for _, err := range errs {
|
||||
fmt.Fprintf(&b, "\t%s\n", err.Error())
|
||||
}
|
||||
b.WriteString("}")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// NewAPIStatusError creates a new APIStatus object from the
|
||||
// aggregated list of StorageError
|
||||
func (e *aggregatedStorageError) NewAPIStatusError(qualifiedResource schema.GroupResource) *apierrors.StatusError {
|
||||
var causes []metav1.StatusCause
|
||||
for _, err := range e.errs.Errors() {
|
||||
var storageErr *storage.StorageError
|
||||
if errors.As(err, &storageErr) {
|
||||
causes = append(causes, metav1.StatusCause{
|
||||
Type: metav1.CauseTypeUnexpectedServerResponse,
|
||||
Field: storageErr.Key,
|
||||
// TODO: do we need to expose the internal error message here?
|
||||
Message: err.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
if errors.Is(err, errTooMany) {
|
||||
causes = append(causes, metav1.StatusCause{
|
||||
Type: metav1.CauseTypeTooMany,
|
||||
Message: errTooMany.Error(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return &apierrors.StatusError{
|
||||
ErrStatus: metav1.Status{
|
||||
Status: metav1.StatusFailure,
|
||||
Code: http.StatusInternalServerError,
|
||||
Reason: metav1.StatusReasonStoreReadError,
|
||||
Details: &metav1.StatusDetails{
|
||||
Group: qualifiedResource.Group,
|
||||
Kind: qualifiedResource.Resource,
|
||||
Name: e.resourcePrefix,
|
||||
Causes: causes,
|
||||
},
|
||||
Message: fmt.Sprintf("failed to read one or more %s from the storage", qualifiedResource.String()),
|
||||
},
|
||||
}
|
||||
}
|
94
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/decoder.go
generated
vendored
94
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/decoder.go
generated
vendored
@ -1,94 +0,0 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/conversion"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
endpointsrequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// NewDefaultDecoder returns the default decoder for etcd3 store
|
||||
func NewDefaultDecoder(codec runtime.Codec, versioner storage.Versioner) Decoder {
|
||||
return &defaultDecoder{
|
||||
codec: codec,
|
||||
versioner: versioner,
|
||||
}
|
||||
}
|
||||
|
||||
// Decoder is used by the etcd storage implementation to decode
|
||||
// transformed data from the storage into an object
|
||||
type Decoder interface {
|
||||
// Decode decodes value of bytes into object. It will also
|
||||
// set the object resource version to rev.
|
||||
// On success, objPtr would be set to the object.
|
||||
Decode(value []byte, objPtr runtime.Object, rev int64) error
|
||||
|
||||
// DecodeListItem decodes bytes value in array into object.
|
||||
DecodeListItem(ctx context.Context, data []byte, rev uint64, newItemFunc func() runtime.Object) (runtime.Object, error)
|
||||
}
|
||||
|
||||
var _ Decoder = &defaultDecoder{}
|
||||
|
||||
type defaultDecoder struct {
|
||||
codec runtime.Codec
|
||||
versioner storage.Versioner
|
||||
}
|
||||
|
||||
// decode decodes value of bytes into object. It will also set the object resource version to rev.
|
||||
// On success, objPtr would be set to the object.
|
||||
func (d *defaultDecoder) Decode(value []byte, objPtr runtime.Object, rev int64) error {
|
||||
if _, err := conversion.EnforcePtr(objPtr); err != nil {
|
||||
// nolint:errorlint // this code was moved from store.go as is
|
||||
return fmt.Errorf("unable to convert output object to pointer: %v", err)
|
||||
}
|
||||
_, _, err := d.codec.Decode(value, nil, objPtr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// being unable to set the version does not prevent the object from being extracted
|
||||
if err := d.versioner.UpdateObject(objPtr, uint64(rev)); err != nil {
|
||||
klog.Errorf("failed to update object version: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeListItem decodes bytes value in array into object.
|
||||
func (d *defaultDecoder) DecodeListItem(ctx context.Context, data []byte, rev uint64, newItemFunc func() runtime.Object) (runtime.Object, error) {
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
endpointsrequest.TrackDecodeLatency(ctx, time.Since(startedAt))
|
||||
}()
|
||||
|
||||
obj, _, err := d.codec.Decode(data, nil, newItemFunc())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := d.versioner.UpdateObject(obj, rev); err != nil {
|
||||
klog.Errorf("failed to update object version: %v", err)
|
||||
}
|
||||
|
||||
return obj, nil
|
||||
}
|
89
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/errors.go
generated
vendored
89
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/errors.go
generated
vendored
@ -1,89 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
goerrors "errors"
|
||||
"net/http"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
|
||||
etcdrpc "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
)
|
||||
|
||||
func interpretWatchError(err error) error {
|
||||
switch {
|
||||
case err == etcdrpc.ErrCompacted:
|
||||
return errors.NewResourceExpired("The resourceVersion for the provided watch is too old.")
|
||||
}
|
||||
|
||||
var corruptobjDeletedErr *corruptObjectDeletedError
|
||||
if goerrors.As(err, &corruptobjDeletedErr) {
|
||||
return &errors.StatusError{
|
||||
ErrStatus: metav1.Status{
|
||||
Status: metav1.StatusFailure,
|
||||
Code: http.StatusInternalServerError,
|
||||
Reason: metav1.StatusReasonStoreReadError,
|
||||
Message: corruptobjDeletedErr.Error(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
expired string = "The resourceVersion for the provided list is too old."
|
||||
continueExpired string = "The provided continue parameter is too old " +
|
||||
"to display a consistent list result. You can start a new list without " +
|
||||
"the continue parameter."
|
||||
inconsistentContinue string = "The provided continue parameter is too old " +
|
||||
"to display a consistent list result. You can start a new list without " +
|
||||
"the continue parameter, or use the continue token in this response to " +
|
||||
"retrieve the remainder of the results. Continuing with the provided " +
|
||||
"token results in an inconsistent list - objects that were created, " +
|
||||
"modified, or deleted between the time the first chunk was returned " +
|
||||
"and now may show up in the list."
|
||||
)
|
||||
|
||||
func interpretListError(err error, paging bool, continueKey, keyPrefix string) error {
|
||||
switch {
|
||||
case err == etcdrpc.ErrCompacted:
|
||||
if paging {
|
||||
return handleCompactedErrorForPaging(continueKey, keyPrefix)
|
||||
}
|
||||
return errors.NewResourceExpired(expired)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func handleCompactedErrorForPaging(continueKey, keyPrefix string) error {
|
||||
// continueToken.ResoureVersion=-1 means that the apiserver can
|
||||
// continue the list at the latest resource version. We don't use rv=0
|
||||
// for this purpose to distinguish from a bad token that has empty rv.
|
||||
newToken, err := storage.EncodeContinue(continueKey, keyPrefix, -1)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
return errors.NewResourceExpired(continueExpired)
|
||||
}
|
||||
statusError := errors.NewResourceExpired(inconsistentContinue)
|
||||
statusError.ErrStatus.ListMeta.Continue = newToken
|
||||
return statusError
|
||||
}
|
82
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/event.go
generated
vendored
82
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/event.go
generated
vendored
@ -1,82 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"go.etcd.io/etcd/api/v3/mvccpb"
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
)
|
||||
|
||||
type event struct {
|
||||
key string
|
||||
value []byte
|
||||
prevValue []byte
|
||||
rev int64
|
||||
isDeleted bool
|
||||
isCreated bool
|
||||
isProgressNotify bool
|
||||
// isInitialEventsEndBookmark helps us keep track
|
||||
// of whether we have sent an annotated bookmark event.
|
||||
//
|
||||
// when this variable is set to true,
|
||||
// a special annotation will be added
|
||||
// to the bookmark event.
|
||||
//
|
||||
// note that we decided to extend the event
|
||||
// struct field to eliminate contention
|
||||
// between startWatching and processEvent
|
||||
isInitialEventsEndBookmark bool
|
||||
}
|
||||
|
||||
// parseKV converts a KeyValue retrieved from an initial sync() listing to a synthetic isCreated event.
|
||||
func parseKV(kv *mvccpb.KeyValue) *event {
|
||||
return &event{
|
||||
key: string(kv.Key),
|
||||
value: kv.Value,
|
||||
prevValue: nil,
|
||||
rev: kv.ModRevision,
|
||||
isDeleted: false,
|
||||
isCreated: true,
|
||||
}
|
||||
}
|
||||
|
||||
func parseEvent(e *clientv3.Event) (*event, error) {
|
||||
if !e.IsCreate() && e.PrevKv == nil {
|
||||
// If the previous value is nil, error. One example of how this is possible is if the previous value has been compacted already.
|
||||
return nil, fmt.Errorf("etcd event received with PrevKv=nil (key=%q, modRevision=%d, type=%s)", string(e.Kv.Key), e.Kv.ModRevision, e.Type.String())
|
||||
|
||||
}
|
||||
ret := &event{
|
||||
key: string(e.Kv.Key),
|
||||
value: e.Kv.Value,
|
||||
rev: e.Kv.ModRevision,
|
||||
isDeleted: e.Type == clientv3.EventTypeDelete,
|
||||
isCreated: e.IsCreate(),
|
||||
}
|
||||
if e.PrevKv != nil {
|
||||
ret.prevValue = e.PrevKv.Value
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func progressNotifyEvent(rev int64) *event {
|
||||
return &event{
|
||||
rev: rev,
|
||||
isProgressNotify: true,
|
||||
}
|
||||
}
|
41
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/healthcheck.go
generated
vendored
41
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/healthcheck.go
generated
vendored
@ -1,41 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// etcdHealth encodes data returned from etcd /healthz handler.
|
||||
type etcdHealth struct {
|
||||
// Note this has to be public so the json library can modify it.
|
||||
Health string `json:"health"`
|
||||
}
|
||||
|
||||
// EtcdHealthCheck decodes data returned from etcd /healthz handler.
|
||||
// Deprecated: Validate health by passing storagebackend.Config directly to storagefactory.CreateProber.
|
||||
func EtcdHealthCheck(data []byte) error {
|
||||
obj := etcdHealth{}
|
||||
if err := json.Unmarshal(data, &obj); err != nil {
|
||||
return err
|
||||
}
|
||||
if obj.Health != "true" {
|
||||
return fmt.Errorf("Unhealthy status: %s", obj.Health)
|
||||
}
|
||||
return nil
|
||||
}
|
107
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/latency_tracker.go
generated
vendored
107
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/latency_tracker.go
generated
vendored
@ -1,107 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
endpointsrequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
)
|
||||
|
||||
// NewETCDLatencyTracker returns an implementation of
|
||||
// clientv3.KV that times the calls from the specified
|
||||
// 'delegate' KV instance in order to track latency incurred.
|
||||
func NewETCDLatencyTracker(delegate clientv3.KV) clientv3.KV {
|
||||
return &clientV3KVLatencyTracker{KV: delegate}
|
||||
}
|
||||
|
||||
// clientV3KVLatencyTracker decorates a clientv3.KV instance and times
|
||||
// each call so we can track the latency an API request incurs in etcd
|
||||
// round trips (the time it takes to send data to etcd and get the
|
||||
// complete response back)
|
||||
//
|
||||
// If an API request involves N (N>=1) round trips to etcd, then we will sum
|
||||
// up the latenciy incurred in each roundtrip.
|
||||
|
||||
// It uses the context associated with the request in flight, so there
|
||||
// are no states shared among the requests in flight, and so there is no
|
||||
// concurrency overhead.
|
||||
// If the goroutine executing the request handler makes concurrent calls
|
||||
// to the underlying storage layer, that is protected since the latency
|
||||
// tracking function TrackStorageLatency is thread safe.
|
||||
//
|
||||
// NOTE: Compact is an asynchronous process and is not associated with
|
||||
// any request, so we will not be tracking its latency.
|
||||
type clientV3KVLatencyTracker struct {
|
||||
clientv3.KV
|
||||
}
|
||||
|
||||
func (c *clientV3KVLatencyTracker) Put(ctx context.Context, key, val string, opts ...clientv3.OpOption) (*clientv3.PutResponse, error) {
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
|
||||
}()
|
||||
|
||||
return c.KV.Put(ctx, key, val, opts...)
|
||||
}
|
||||
|
||||
func (c *clientV3KVLatencyTracker) Get(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) {
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
|
||||
}()
|
||||
|
||||
return c.KV.Get(ctx, key, opts...)
|
||||
}
|
||||
|
||||
func (c *clientV3KVLatencyTracker) Delete(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.DeleteResponse, error) {
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
|
||||
}()
|
||||
|
||||
return c.KV.Delete(ctx, key, opts...)
|
||||
}
|
||||
|
||||
func (c *clientV3KVLatencyTracker) Do(ctx context.Context, op clientv3.Op) (clientv3.OpResponse, error) {
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
endpointsrequest.TrackStorageLatency(ctx, time.Since(startedAt))
|
||||
}()
|
||||
|
||||
return c.KV.Do(ctx, op)
|
||||
}
|
||||
|
||||
func (c *clientV3KVLatencyTracker) Txn(ctx context.Context) clientv3.Txn {
|
||||
return &clientV3TxnTracker{ctx: ctx, Txn: c.KV.Txn(ctx)}
|
||||
}
|
||||
|
||||
type clientV3TxnTracker struct {
|
||||
ctx context.Context
|
||||
clientv3.Txn
|
||||
}
|
||||
|
||||
func (t *clientV3TxnTracker) Commit() (*clientv3.TxnResponse, error) {
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
endpointsrequest.TrackStorageLatency(t.ctx, time.Since(startedAt))
|
||||
}()
|
||||
|
||||
return t.Txn.Commit()
|
||||
}
|
131
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/lease_manager.go
generated
vendored
131
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/lease_manager.go
generated
vendored
@ -1,131 +0,0 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultLeaseReuseDurationSeconds = 60
|
||||
defaultLeaseMaxObjectCount = 1000
|
||||
)
|
||||
|
||||
// LeaseManagerConfig is configuration for creating a lease manager.
|
||||
type LeaseManagerConfig struct {
|
||||
// ReuseDurationSeconds specifies time in seconds that each lease is reused
|
||||
ReuseDurationSeconds int64
|
||||
// MaxObjectCount specifies how many objects that a lease can attach
|
||||
MaxObjectCount int64
|
||||
}
|
||||
|
||||
// NewDefaultLeaseManagerConfig creates a LeaseManagerConfig with default values
|
||||
func NewDefaultLeaseManagerConfig() LeaseManagerConfig {
|
||||
return LeaseManagerConfig{
|
||||
ReuseDurationSeconds: defaultLeaseReuseDurationSeconds,
|
||||
MaxObjectCount: defaultLeaseMaxObjectCount,
|
||||
}
|
||||
}
|
||||
|
||||
// leaseManager is used to manage leases requested from etcd. If a new write
|
||||
// needs a lease that has similar expiration time to the previous one, the old
|
||||
// lease will be reused to reduce the overhead of etcd, since lease operations
|
||||
// are expensive. In the implementation, we only store one previous lease,
|
||||
// since all the events have the same ttl.
|
||||
type leaseManager struct {
|
||||
client *clientv3.Client // etcd client used to grant leases
|
||||
leaseMu sync.Mutex
|
||||
prevLeaseID clientv3.LeaseID
|
||||
prevLeaseExpirationTime time.Time
|
||||
// The period of time in seconds and percent of TTL that each lease is
|
||||
// reused. The minimum of them is used to avoid unreasonably large
|
||||
// numbers.
|
||||
leaseReuseDurationSeconds int64
|
||||
leaseReuseDurationPercent float64
|
||||
leaseMaxAttachedObjectCount int64
|
||||
leaseAttachedObjectCount int64
|
||||
}
|
||||
|
||||
// newDefaultLeaseManager creates a new lease manager using default setting.
|
||||
func newDefaultLeaseManager(client *clientv3.Client, config LeaseManagerConfig) *leaseManager {
|
||||
if config.MaxObjectCount <= 0 {
|
||||
config.MaxObjectCount = defaultLeaseMaxObjectCount
|
||||
}
|
||||
return newLeaseManager(client, config.ReuseDurationSeconds, 0.05, config.MaxObjectCount)
|
||||
}
|
||||
|
||||
// newLeaseManager creates a new lease manager with the number of buffered
|
||||
// leases, lease reuse duration in seconds and percentage. The percentage
|
||||
// value x means x*100%.
|
||||
func newLeaseManager(client *clientv3.Client, leaseReuseDurationSeconds int64, leaseReuseDurationPercent float64, maxObjectCount int64) *leaseManager {
|
||||
return &leaseManager{
|
||||
client: client,
|
||||
leaseReuseDurationSeconds: leaseReuseDurationSeconds,
|
||||
leaseReuseDurationPercent: leaseReuseDurationPercent,
|
||||
leaseMaxAttachedObjectCount: maxObjectCount,
|
||||
}
|
||||
}
|
||||
|
||||
// GetLease returns a lease based on requested ttl: if the cached previous
|
||||
// lease can be reused, reuse it; otherwise request a new one from etcd.
|
||||
func (l *leaseManager) GetLease(ctx context.Context, ttl int64) (clientv3.LeaseID, error) {
|
||||
now := time.Now()
|
||||
l.leaseMu.Lock()
|
||||
defer l.leaseMu.Unlock()
|
||||
// check if previous lease can be reused
|
||||
reuseDurationSeconds := l.getReuseDurationSecondsLocked(ttl)
|
||||
valid := now.Add(time.Duration(ttl) * time.Second).Before(l.prevLeaseExpirationTime)
|
||||
sufficient := now.Add(time.Duration(ttl+reuseDurationSeconds) * time.Second).After(l.prevLeaseExpirationTime)
|
||||
|
||||
// We count all operations that happened in the same lease, regardless of success or failure.
|
||||
// Currently each GetLease call only attach 1 object
|
||||
l.leaseAttachedObjectCount++
|
||||
|
||||
if valid && sufficient && l.leaseAttachedObjectCount <= l.leaseMaxAttachedObjectCount {
|
||||
return l.prevLeaseID, nil
|
||||
}
|
||||
|
||||
// request a lease with a little extra ttl from etcd
|
||||
ttl += reuseDurationSeconds
|
||||
lcr, err := l.client.Lease.Grant(ctx, ttl)
|
||||
if err != nil {
|
||||
return clientv3.LeaseID(0), err
|
||||
}
|
||||
// cache the new lease id
|
||||
l.prevLeaseID = lcr.ID
|
||||
l.prevLeaseExpirationTime = now.Add(time.Duration(ttl) * time.Second)
|
||||
// refresh count
|
||||
metrics.UpdateLeaseObjectCount(l.leaseAttachedObjectCount)
|
||||
l.leaseAttachedObjectCount = 1
|
||||
return lcr.ID, nil
|
||||
}
|
||||
|
||||
// getReuseDurationSecondsLocked returns the reusable duration in seconds
|
||||
// based on the configuration. Lock has to be acquired before calling this
|
||||
// function.
|
||||
func (l *leaseManager) getReuseDurationSecondsLocked(ttl int64) int64 {
|
||||
reuseDurationSeconds := int64(l.leaseReuseDurationPercent * float64(ttl))
|
||||
if reuseDurationSeconds > l.leaseReuseDurationSeconds {
|
||||
reuseDurationSeconds = l.leaseReuseDurationSeconds
|
||||
}
|
||||
return reuseDurationSeconds
|
||||
}
|
90
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/logger.go
generated
vendored
90
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/logger.go
generated
vendored
@ -1,90 +0,0 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"google.golang.org/grpc/grpclog"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
func init() {
|
||||
grpclog.SetLoggerV2(klogWrapper{})
|
||||
}
|
||||
|
||||
type klogWrapper struct{}
|
||||
|
||||
const klogWrapperDepth = 4
|
||||
|
||||
func (klogWrapper) Info(args ...interface{}) {
|
||||
if klogV := klog.V(5); klogV.Enabled() {
|
||||
klogV.InfoSDepth(klogWrapperDepth, fmt.Sprint(args...))
|
||||
}
|
||||
}
|
||||
|
||||
func (klogWrapper) Infoln(args ...interface{}) {
|
||||
if klogV := klog.V(5); klogV.Enabled() {
|
||||
klogV.InfoSDepth(klogWrapperDepth, fmt.Sprintln(args...))
|
||||
}
|
||||
}
|
||||
|
||||
func (klogWrapper) Infof(format string, args ...interface{}) {
|
||||
if klogV := klog.V(5); klogV.Enabled() {
|
||||
klog.V(5).InfoSDepth(klogWrapperDepth, fmt.Sprintf(format, args...))
|
||||
}
|
||||
}
|
||||
|
||||
func (klogWrapper) Warning(args ...interface{}) {
|
||||
klog.WarningDepth(klogWrapperDepth, args...)
|
||||
}
|
||||
|
||||
func (klogWrapper) Warningln(args ...interface{}) {
|
||||
klog.WarningDepth(klogWrapperDepth, fmt.Sprintln(args...))
|
||||
}
|
||||
|
||||
func (klogWrapper) Warningf(format string, args ...interface{}) {
|
||||
klog.WarningDepth(klogWrapperDepth, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func (klogWrapper) Error(args ...interface{}) {
|
||||
klog.ErrorDepth(klogWrapperDepth, args...)
|
||||
}
|
||||
|
||||
func (klogWrapper) Errorln(args ...interface{}) {
|
||||
klog.ErrorDepth(klogWrapperDepth, fmt.Sprintln(args...))
|
||||
}
|
||||
|
||||
func (klogWrapper) Errorf(format string, args ...interface{}) {
|
||||
klog.ErrorDepth(klogWrapperDepth, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func (klogWrapper) Fatal(args ...interface{}) {
|
||||
klog.FatalDepth(klogWrapperDepth, args...)
|
||||
}
|
||||
|
||||
func (klogWrapper) Fatalln(args ...interface{}) {
|
||||
klog.FatalDepth(klogWrapperDepth, fmt.Sprintln(args...))
|
||||
}
|
||||
|
||||
func (klogWrapper) Fatalf(format string, args ...interface{}) {
|
||||
klog.FatalDepth(klogWrapperDepth, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func (klogWrapper) V(l int) bool {
|
||||
return bool(klog.V(klog.Level(l)).Enabled())
|
||||
}
|
4
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/metrics/OWNERS
generated
vendored
4
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/metrics/OWNERS
generated
vendored
@ -1,4 +0,0 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- logicalhan
|
309
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/metrics/metrics.go
generated
vendored
309
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/metrics/metrics.go
generated
vendored
@ -1,309 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
compbasemetrics "k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
/*
|
||||
* By default, all the following metrics are defined as falling under
|
||||
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
|
||||
*
|
||||
* Promoting the stability level of the metric is a responsibility of the component owner, since it
|
||||
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
|
||||
* the metric stability policy.
|
||||
*/
|
||||
var (
|
||||
etcdRequestLatency = compbasemetrics.NewHistogramVec(
|
||||
&compbasemetrics.HistogramOpts{
|
||||
Name: "etcd_request_duration_seconds",
|
||||
Help: "Etcd request latency in seconds for each operation and object type.",
|
||||
// Etcd request latency in seconds for each operation and object type.
|
||||
// This metric is used for verifying etcd api call latencies SLO
|
||||
// keep consistent with apiserver metric 'requestLatencies' in
|
||||
// staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go
|
||||
Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
|
||||
4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
etcdRequestCounts = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "etcd_requests_total",
|
||||
Help: "Etcd request counts for each operation and object type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
etcdRequestErrorCounts = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "etcd_request_errors_total",
|
||||
Help: "Etcd failed request counts for each operation and object type.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
objectCounts = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Name: "apiserver_storage_objects",
|
||||
Help: "Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.",
|
||||
StabilityLevel: compbasemetrics.STABLE,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
dbTotalSize = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Subsystem: "apiserver",
|
||||
Name: "storage_db_total_size_in_bytes",
|
||||
Help: "Total size of the storage database file physically allocated in bytes.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
DeprecatedVersion: "1.28.0",
|
||||
},
|
||||
[]string{"endpoint"},
|
||||
)
|
||||
storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"storage_cluster_id"}, nil, compbasemetrics.STABLE, "")
|
||||
storageMonitor = &monitorCollector{monitorGetter: func() ([]Monitor, error) { return nil, nil }}
|
||||
etcdEventsReceivedCounts = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Subsystem: "apiserver",
|
||||
Name: "storage_events_received_total",
|
||||
Help: "Number of etcd events received split by kind.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
etcdBookmarkCounts = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Name: "etcd_bookmark_counts",
|
||||
Help: "Number of etcd bookmarks (progress notify events) split by kind.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
etcdLeaseObjectCounts = compbasemetrics.NewHistogramVec(
|
||||
&compbasemetrics.HistogramOpts{
|
||||
Name: "etcd_lease_object_counts",
|
||||
Help: "Number of objects attached to a single etcd lease.",
|
||||
Buckets: []float64{10, 50, 100, 500, 1000, 2500, 5000},
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{},
|
||||
)
|
||||
listStorageCount = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "apiserver_storage_list_total",
|
||||
Help: "Number of LIST requests served from storage",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
listStorageNumFetched = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "apiserver_storage_list_fetched_objects_total",
|
||||
Help: "Number of objects read from storage in the course of serving a LIST request",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
listStorageNumSelectorEvals = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "apiserver_storage_list_evaluated_objects_total",
|
||||
Help: "Number of objects tested in the course of serving a LIST request from storage",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
listStorageNumReturned = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Name: "apiserver_storage_list_returned_objects_total",
|
||||
Help: "Number of objects returned for a LIST request from storage",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
decodeErrorCounts = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: "apiserver",
|
||||
Name: "storage_decode_errors_total",
|
||||
Help: "Number of stored object decode errors split by object type",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register all metrics.
|
||||
func Register() {
|
||||
// Register the metrics.
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(etcdRequestLatency)
|
||||
legacyregistry.MustRegister(etcdRequestCounts)
|
||||
legacyregistry.MustRegister(etcdRequestErrorCounts)
|
||||
legacyregistry.MustRegister(objectCounts)
|
||||
legacyregistry.MustRegister(dbTotalSize)
|
||||
legacyregistry.CustomMustRegister(storageMonitor)
|
||||
legacyregistry.MustRegister(etcdEventsReceivedCounts)
|
||||
legacyregistry.MustRegister(etcdBookmarkCounts)
|
||||
legacyregistry.MustRegister(etcdLeaseObjectCounts)
|
||||
legacyregistry.MustRegister(listStorageCount)
|
||||
legacyregistry.MustRegister(listStorageNumFetched)
|
||||
legacyregistry.MustRegister(listStorageNumSelectorEvals)
|
||||
legacyregistry.MustRegister(listStorageNumReturned)
|
||||
legacyregistry.MustRegister(decodeErrorCounts)
|
||||
})
|
||||
}
|
||||
|
||||
// UpdateObjectCount sets the apiserver_storage_object_counts metric.
|
||||
func UpdateObjectCount(resourcePrefix string, count int64) {
|
||||
objectCounts.WithLabelValues(resourcePrefix).Set(float64(count))
|
||||
}
|
||||
|
||||
// RecordEtcdRequest updates and sets the etcd_request_duration_seconds,
|
||||
// etcd_request_total, etcd_request_errors_total metrics.
|
||||
func RecordEtcdRequest(verb, resource string, err error, startTime time.Time) {
|
||||
v := []string{verb, resource}
|
||||
etcdRequestLatency.WithLabelValues(v...).Observe(sinceInSeconds(startTime))
|
||||
etcdRequestCounts.WithLabelValues(v...).Inc()
|
||||
if err != nil {
|
||||
etcdRequestErrorCounts.WithLabelValues(v...).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
// RecordEtcdEvent updated the etcd_events_received_total metric.
|
||||
func RecordEtcdEvent(resource string) {
|
||||
etcdEventsReceivedCounts.WithLabelValues(resource).Inc()
|
||||
}
|
||||
|
||||
// RecordEtcdBookmark updates the etcd_bookmark_counts metric.
|
||||
func RecordEtcdBookmark(resource string) {
|
||||
etcdBookmarkCounts.WithLabelValues(resource).Inc()
|
||||
}
|
||||
|
||||
// RecordDecodeError sets the storage_decode_errors metrics.
|
||||
func RecordDecodeError(resource string) {
|
||||
decodeErrorCounts.WithLabelValues(resource).Inc()
|
||||
}
|
||||
|
||||
// Reset resets the etcd_request_duration_seconds metric.
|
||||
func Reset() {
|
||||
etcdRequestLatency.Reset()
|
||||
}
|
||||
|
||||
// sinceInSeconds gets the time since the specified start in seconds.
|
||||
//
|
||||
// This is a variable to facilitate testing.
|
||||
var sinceInSeconds = func(start time.Time) float64 {
|
||||
return time.Since(start).Seconds()
|
||||
}
|
||||
|
||||
// UpdateEtcdDbSize sets the etcd_db_total_size_in_bytes metric.
|
||||
// Deprecated: Metric etcd_db_total_size_in_bytes will be replaced with apiserver_storage_size_bytes
|
||||
func UpdateEtcdDbSize(ep string, size int64) {
|
||||
dbTotalSize.WithLabelValues(ep).Set(float64(size))
|
||||
}
|
||||
|
||||
// SetStorageMonitorGetter sets monitor getter to allow monitoring etcd stats.
|
||||
func SetStorageMonitorGetter(getter func() ([]Monitor, error)) {
|
||||
storageMonitor.setGetter(getter)
|
||||
}
|
||||
|
||||
// UpdateLeaseObjectCount sets the etcd_lease_object_counts metric.
|
||||
func UpdateLeaseObjectCount(count int64) {
|
||||
// Currently we only store one previous lease, since all the events have the same ttl.
|
||||
// See pkg/storage/etcd3/lease_manager.go
|
||||
etcdLeaseObjectCounts.WithLabelValues().Observe(float64(count))
|
||||
}
|
||||
|
||||
// RecordListEtcd3Metrics notes various metrics of the cost to serve a LIST request
|
||||
func RecordStorageListMetrics(resource string, numFetched, numEvald, numReturned int) {
|
||||
listStorageCount.WithLabelValues(resource).Inc()
|
||||
listStorageNumFetched.WithLabelValues(resource).Add(float64(numFetched))
|
||||
listStorageNumSelectorEvals.WithLabelValues(resource).Add(float64(numEvald))
|
||||
listStorageNumReturned.WithLabelValues(resource).Add(float64(numReturned))
|
||||
}
|
||||
|
||||
type Monitor interface {
|
||||
Monitor(ctx context.Context) (StorageMetrics, error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
type StorageMetrics struct {
|
||||
Size int64
|
||||
}
|
||||
|
||||
type monitorCollector struct {
|
||||
compbasemetrics.BaseStableCollector
|
||||
|
||||
mutex sync.Mutex
|
||||
monitorGetter func() ([]Monitor, error)
|
||||
}
|
||||
|
||||
func (m *monitorCollector) setGetter(monitorGetter func() ([]Monitor, error)) {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
m.monitorGetter = monitorGetter
|
||||
}
|
||||
|
||||
func (m *monitorCollector) getGetter() func() ([]Monitor, error) {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
return m.monitorGetter
|
||||
}
|
||||
|
||||
// DescribeWithStability implements compbasemetrics.StableColletor
|
||||
func (c *monitorCollector) DescribeWithStability(ch chan<- *compbasemetrics.Desc) {
|
||||
ch <- storageSizeDescription
|
||||
}
|
||||
|
||||
// CollectWithStability implements compbasemetrics.StableColletor
|
||||
func (c *monitorCollector) CollectWithStability(ch chan<- compbasemetrics.Metric) {
|
||||
monitors, err := c.getGetter()()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for i, m := range monitors {
|
||||
storageClusterID := fmt.Sprintf("etcd-%d", i)
|
||||
|
||||
klog.V(4).InfoS("Start collecting storage metrics", "storage_cluster_id", storageClusterID)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
metrics, err := m.Monitor(ctx)
|
||||
cancel()
|
||||
m.Close()
|
||||
if err != nil {
|
||||
klog.InfoS("Failed to get storage metrics", "storage_cluster_id", storageClusterID, "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), storageClusterID)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to create metric", "storage_cluster_id", storageClusterID)
|
||||
}
|
||||
ch <- metric
|
||||
}
|
||||
}
|
1084
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/store.go
generated
vendored
1084
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/store.go
generated
vendored
File diff suppressed because it is too large
Load Diff
748
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/watcher.go
generated
vendored
748
e2e/vendor/k8s.io/apiserver/pkg/storage/etcd3/watcher.go
generated
vendored
@ -1,748 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
grpccodes "google.golang.org/grpc/codes"
|
||||
grpcstatus "google.golang.org/grpc/status"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/apiserver/pkg/features"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
utilflowcontrol "k8s.io/apiserver/pkg/util/flowcontrol"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
// We have set a buffer in order to reduce times of context switches.
|
||||
incomingBufSize = 100
|
||||
outgoingBufSize = 100
|
||||
processEventConcurrency = 10
|
||||
)
|
||||
|
||||
// defaultWatcherMaxLimit is used to facilitate construction tests
|
||||
var defaultWatcherMaxLimit int64 = maxLimit
|
||||
|
||||
// fatalOnDecodeError is used during testing to panic the server if watcher encounters a decoding error
|
||||
var fatalOnDecodeError = false
|
||||
|
||||
func init() {
|
||||
// check to see if we are running in a test environment
|
||||
TestOnlySetFatalOnDecodeError(true)
|
||||
fatalOnDecodeError, _ = strconv.ParseBool(os.Getenv("KUBE_PANIC_WATCH_DECODE_ERROR"))
|
||||
}
|
||||
|
||||
// TestOnlySetFatalOnDecodeError should only be used for cases where decode errors are expected and need to be tested. e.g. conversion webhooks.
|
||||
func TestOnlySetFatalOnDecodeError(b bool) {
|
||||
fatalOnDecodeError = b
|
||||
}
|
||||
|
||||
type watcher struct {
|
||||
client *clientv3.Client
|
||||
codec runtime.Codec
|
||||
newFunc func() runtime.Object
|
||||
objectType string
|
||||
groupResource schema.GroupResource
|
||||
versioner storage.Versioner
|
||||
transformer value.Transformer
|
||||
getCurrentStorageRV func(context.Context) (uint64, error)
|
||||
}
|
||||
|
||||
// watchChan implements watch.Interface.
|
||||
type watchChan struct {
|
||||
watcher *watcher
|
||||
key string
|
||||
initialRev int64
|
||||
recursive bool
|
||||
progressNotify bool
|
||||
internalPred storage.SelectionPredicate
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
incomingEventChan chan *event
|
||||
resultChan chan watch.Event
|
||||
errChan chan error
|
||||
}
|
||||
|
||||
// Watch watches on a key and returns a watch.Interface that transfers relevant notifications.
|
||||
// If rev is zero, it will return the existing object(s) and then start watching from
|
||||
// the maximum revision+1 from returned objects.
|
||||
// If rev is non-zero, it will watch events happened after given revision.
|
||||
// If opts.Recursive is false, it watches on given key.
|
||||
// If opts.Recursive is true, it watches any children and directories under the key, excluding the root key itself.
|
||||
// pred must be non-nil. Only if opts.Predicate matches the change, it will be returned.
|
||||
func (w *watcher) Watch(ctx context.Context, key string, rev int64, opts storage.ListOptions) (watch.Interface, error) {
|
||||
if opts.Recursive && !strings.HasSuffix(key, "/") {
|
||||
key += "/"
|
||||
}
|
||||
if opts.ProgressNotify && w.newFunc == nil {
|
||||
return nil, apierrors.NewInternalError(errors.New("progressNotify for watch is unsupported by the etcd storage because no newFunc was provided"))
|
||||
}
|
||||
startWatchRV, err := w.getStartWatchResourceVersion(ctx, rev, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
wc := w.createWatchChan(ctx, key, startWatchRV, opts.Recursive, opts.ProgressNotify, opts.Predicate)
|
||||
go wc.run(isInitialEventsEndBookmarkRequired(opts), areInitialEventsRequired(rev, opts))
|
||||
|
||||
// For etcd watch we don't have an easy way to answer whether the watch
|
||||
// has already caught up. So in the initial version (given that watchcache
|
||||
// is by default enabled for all resources but Events), we just deliver
|
||||
// the initialization signal immediately. Improving this will be explored
|
||||
// in the future.
|
||||
utilflowcontrol.WatchInitialized(ctx)
|
||||
|
||||
return wc, nil
|
||||
}
|
||||
|
||||
func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, recursive, progressNotify bool, pred storage.SelectionPredicate) *watchChan {
|
||||
wc := &watchChan{
|
||||
watcher: w,
|
||||
key: key,
|
||||
initialRev: rev,
|
||||
recursive: recursive,
|
||||
progressNotify: progressNotify,
|
||||
internalPred: pred,
|
||||
incomingEventChan: make(chan *event, incomingBufSize),
|
||||
resultChan: make(chan watch.Event, outgoingBufSize),
|
||||
errChan: make(chan error, 1),
|
||||
}
|
||||
if pred.Empty() {
|
||||
// The filter doesn't filter out any object.
|
||||
wc.internalPred = storage.Everything
|
||||
}
|
||||
wc.ctx, wc.cancel = context.WithCancel(ctx)
|
||||
return wc
|
||||
}
|
||||
|
||||
// getStartWatchResourceVersion returns a ResourceVersion
|
||||
// the watch will be started from.
|
||||
// Depending on the input parameters the semantics of the returned ResourceVersion are:
|
||||
// - start at Exact (return resourceVersion)
|
||||
// - start at Most Recent (return an RV from etcd)
|
||||
func (w *watcher) getStartWatchResourceVersion(ctx context.Context, resourceVersion int64, opts storage.ListOptions) (int64, error) {
|
||||
if resourceVersion > 0 {
|
||||
return resourceVersion, nil
|
||||
}
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) {
|
||||
return 0, nil
|
||||
}
|
||||
if opts.SendInitialEvents == nil || *opts.SendInitialEvents {
|
||||
// note that when opts.SendInitialEvents=true
|
||||
// we will be issuing a consistent LIST request
|
||||
// against etcd followed by the special bookmark event
|
||||
return 0, nil
|
||||
}
|
||||
// at this point the clients is interested
|
||||
// only in getting a stream of events
|
||||
// starting at the MostRecent point in time (RV)
|
||||
currentStorageRV, err := w.getCurrentStorageRV(ctx)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
// currentStorageRV is taken from resp.Header.Revision (int64)
|
||||
// and cast to uint64, so it is safe to do reverse
|
||||
// at some point we should unify the interface but that
|
||||
// would require changing Versioner.UpdateList
|
||||
return int64(currentStorageRV), nil
|
||||
}
|
||||
|
||||
// isInitialEventsEndBookmarkRequired since there is no way to directly set
|
||||
// opts.ProgressNotify from the API and the etcd3 impl doesn't support
|
||||
// notification for external clients we simply return initialEventsEndBookmarkRequired
|
||||
// to only send the bookmark event after the initial list call.
|
||||
//
|
||||
// see: https://github.com/kubernetes/kubernetes/issues/120348
|
||||
func isInitialEventsEndBookmarkRequired(opts storage.ListOptions) bool {
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) {
|
||||
return false
|
||||
}
|
||||
return opts.SendInitialEvents != nil && *opts.SendInitialEvents && opts.Predicate.AllowWatchBookmarks
|
||||
}
|
||||
|
||||
// areInitialEventsRequired returns true if all events from the etcd should be returned.
|
||||
func areInitialEventsRequired(resourceVersion int64, opts storage.ListOptions) bool {
|
||||
if opts.SendInitialEvents == nil && resourceVersion == 0 {
|
||||
return true // legacy case
|
||||
}
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) {
|
||||
return false
|
||||
}
|
||||
return opts.SendInitialEvents != nil && *opts.SendInitialEvents
|
||||
}
|
||||
|
||||
type etcdError interface {
|
||||
Code() grpccodes.Code
|
||||
Error() string
|
||||
}
|
||||
|
||||
type grpcError interface {
|
||||
GRPCStatus() *grpcstatus.Status
|
||||
}
|
||||
|
||||
func isCancelError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if err == context.Canceled {
|
||||
return true
|
||||
}
|
||||
if etcdErr, ok := err.(etcdError); ok && etcdErr.Code() == grpccodes.Canceled {
|
||||
return true
|
||||
}
|
||||
if grpcErr, ok := err.(grpcError); ok && grpcErr.GRPCStatus().Code() == grpccodes.Canceled {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (wc *watchChan) run(initialEventsEndBookmarkRequired, forceInitialEvents bool) {
|
||||
watchClosedCh := make(chan struct{})
|
||||
go wc.startWatching(watchClosedCh, initialEventsEndBookmarkRequired, forceInitialEvents)
|
||||
|
||||
var resultChanWG sync.WaitGroup
|
||||
wc.processEvents(&resultChanWG)
|
||||
|
||||
select {
|
||||
case err := <-wc.errChan:
|
||||
if isCancelError(err) {
|
||||
break
|
||||
}
|
||||
errResult := transformErrorToEvent(err)
|
||||
if errResult != nil {
|
||||
// error result is guaranteed to be received by user before closing ResultChan.
|
||||
select {
|
||||
case wc.resultChan <- *errResult:
|
||||
case <-wc.ctx.Done(): // user has given up all results
|
||||
}
|
||||
}
|
||||
case <-watchClosedCh:
|
||||
case <-wc.ctx.Done(): // user cancel
|
||||
}
|
||||
|
||||
// We use wc.ctx to reap all goroutines. Under whatever condition, we should stop them all.
|
||||
// It's fine to double cancel.
|
||||
wc.cancel()
|
||||
|
||||
// we need to wait until resultChan wouldn't be used anymore
|
||||
resultChanWG.Wait()
|
||||
close(wc.resultChan)
|
||||
}
|
||||
|
||||
func (wc *watchChan) Stop() {
|
||||
wc.cancel()
|
||||
}
|
||||
|
||||
func (wc *watchChan) ResultChan() <-chan watch.Event {
|
||||
return wc.resultChan
|
||||
}
|
||||
|
||||
func (wc *watchChan) RequestWatchProgress() error {
|
||||
return wc.watcher.client.RequestProgress(wc.ctx)
|
||||
}
|
||||
|
||||
// sync tries to retrieve existing data and send them to process.
|
||||
// The revision to watch will be set to the revision in response.
|
||||
// All events sent will have isCreated=true
|
||||
func (wc *watchChan) sync() error {
|
||||
opts := []clientv3.OpOption{}
|
||||
if wc.recursive {
|
||||
opts = append(opts, clientv3.WithLimit(defaultWatcherMaxLimit))
|
||||
rangeEnd := clientv3.GetPrefixRangeEnd(wc.key)
|
||||
opts = append(opts, clientv3.WithRange(rangeEnd))
|
||||
}
|
||||
|
||||
var err error
|
||||
var lastKey []byte
|
||||
var withRev int64
|
||||
var getResp *clientv3.GetResponse
|
||||
|
||||
metricsOp := "get"
|
||||
if wc.recursive {
|
||||
metricsOp = "list"
|
||||
}
|
||||
|
||||
preparedKey := wc.key
|
||||
|
||||
for {
|
||||
startTime := time.Now()
|
||||
getResp, err = wc.watcher.client.KV.Get(wc.ctx, preparedKey, opts...)
|
||||
metrics.RecordEtcdRequest(metricsOp, wc.watcher.groupResource.String(), err, startTime)
|
||||
if err != nil {
|
||||
return interpretListError(err, true, preparedKey, wc.key)
|
||||
}
|
||||
|
||||
if len(getResp.Kvs) == 0 && getResp.More {
|
||||
return fmt.Errorf("no results were found, but etcd indicated there were more values remaining")
|
||||
}
|
||||
|
||||
// send items from the response until no more results
|
||||
for i, kv := range getResp.Kvs {
|
||||
lastKey = kv.Key
|
||||
wc.sendEvent(parseKV(kv))
|
||||
// free kv early. Long lists can take O(seconds) to decode.
|
||||
getResp.Kvs[i] = nil
|
||||
}
|
||||
|
||||
if withRev == 0 {
|
||||
wc.initialRev = getResp.Header.Revision
|
||||
}
|
||||
|
||||
// no more results remain
|
||||
if !getResp.More {
|
||||
return nil
|
||||
}
|
||||
|
||||
preparedKey = string(lastKey) + "\x00"
|
||||
if withRev == 0 {
|
||||
withRev = getResp.Header.Revision
|
||||
opts = append(opts, clientv3.WithRev(withRev))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func logWatchChannelErr(err error) {
|
||||
switch {
|
||||
case strings.Contains(err.Error(), "mvcc: required revision has been compacted"):
|
||||
// mvcc revision compaction which is regarded as warning, not error
|
||||
klog.Warningf("watch chan error: %v", err)
|
||||
case isCancelError(err):
|
||||
// expected when watches close, no need to log
|
||||
default:
|
||||
klog.Errorf("watch chan error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// startWatching does:
|
||||
// - get current objects if initialRev=0; set initialRev to current rev
|
||||
// - watch on given key and send events to process.
|
||||
//
|
||||
// initialEventsEndBookmarkSent helps us keep track
|
||||
// of whether we have sent an annotated bookmark event.
|
||||
//
|
||||
// it's important to note that we don't
|
||||
// need to track the actual RV because
|
||||
// we only send the bookmark event
|
||||
// after the initial list call.
|
||||
//
|
||||
// when this variable is set to false,
|
||||
// it means we don't have any specific
|
||||
// preferences for delivering bookmark events.
|
||||
func (wc *watchChan) startWatching(watchClosedCh chan struct{}, initialEventsEndBookmarkRequired, forceInitialEvents bool) {
|
||||
if wc.initialRev > 0 && forceInitialEvents {
|
||||
currentStorageRV, err := wc.watcher.getCurrentStorageRV(wc.ctx)
|
||||
if err != nil {
|
||||
wc.sendError(err)
|
||||
return
|
||||
}
|
||||
if uint64(wc.initialRev) > currentStorageRV {
|
||||
wc.sendError(storage.NewTooLargeResourceVersionError(uint64(wc.initialRev), currentStorageRV, int(wait.Jitter(1*time.Second, 3).Seconds())))
|
||||
return
|
||||
}
|
||||
}
|
||||
if forceInitialEvents {
|
||||
if err := wc.sync(); err != nil {
|
||||
klog.Errorf("failed to sync with latest state: %v", err)
|
||||
wc.sendError(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
if initialEventsEndBookmarkRequired {
|
||||
wc.sendEvent(func() *event {
|
||||
e := progressNotifyEvent(wc.initialRev)
|
||||
e.isInitialEventsEndBookmark = true
|
||||
return e
|
||||
}())
|
||||
}
|
||||
opts := []clientv3.OpOption{clientv3.WithRev(wc.initialRev + 1), clientv3.WithPrevKV()}
|
||||
if wc.recursive {
|
||||
opts = append(opts, clientv3.WithPrefix())
|
||||
}
|
||||
if wc.progressNotify {
|
||||
opts = append(opts, clientv3.WithProgressNotify())
|
||||
}
|
||||
wch := wc.watcher.client.Watch(wc.ctx, wc.key, opts...)
|
||||
for wres := range wch {
|
||||
if wres.Err() != nil {
|
||||
err := wres.Err()
|
||||
// If there is an error on server (e.g. compaction), the channel will return it before closed.
|
||||
logWatchChannelErr(err)
|
||||
wc.sendError(err)
|
||||
return
|
||||
}
|
||||
if wres.IsProgressNotify() {
|
||||
wc.sendEvent(progressNotifyEvent(wres.Header.GetRevision()))
|
||||
metrics.RecordEtcdBookmark(wc.watcher.groupResource.String())
|
||||
continue
|
||||
}
|
||||
|
||||
for _, e := range wres.Events {
|
||||
metrics.RecordEtcdEvent(wc.watcher.groupResource.String())
|
||||
parsedEvent, err := parseEvent(e)
|
||||
if err != nil {
|
||||
logWatchChannelErr(err)
|
||||
wc.sendError(err)
|
||||
return
|
||||
}
|
||||
wc.sendEvent(parsedEvent)
|
||||
}
|
||||
}
|
||||
// When we come to this point, it's only possible that client side ends the watch.
|
||||
// e.g. cancel the context, close the client.
|
||||
// If this watch chan is broken and context isn't cancelled, other goroutines will still hang.
|
||||
// We should notify the main thread that this goroutine has exited.
|
||||
close(watchClosedCh)
|
||||
}
|
||||
|
||||
// processEvents processes events from etcd watcher and sends results to resultChan.
|
||||
func (wc *watchChan) processEvents(wg *sync.WaitGroup) {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ConcurrentWatchObjectDecode) {
|
||||
wc.concurrentProcessEvents(wg)
|
||||
} else {
|
||||
wg.Add(1)
|
||||
go wc.serialProcessEvents(wg)
|
||||
}
|
||||
}
|
||||
func (wc *watchChan) serialProcessEvents(wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case e := <-wc.incomingEventChan:
|
||||
res, err := wc.transform(e)
|
||||
if err != nil {
|
||||
wc.sendError(err)
|
||||
return
|
||||
}
|
||||
|
||||
if res == nil {
|
||||
continue
|
||||
}
|
||||
if len(wc.resultChan) == cap(wc.resultChan) {
|
||||
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", wc.watcher.objectType, "groupResource", wc.watcher.groupResource)
|
||||
}
|
||||
// If user couldn't receive results fast enough, we also block incoming events from watcher.
|
||||
// Because storing events in local will cause more memory usage.
|
||||
// The worst case would be closing the fast watcher.
|
||||
select {
|
||||
case wc.resultChan <- *res:
|
||||
case <-wc.ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-wc.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) concurrentProcessEvents(wg *sync.WaitGroup) {
|
||||
p := concurrentOrderedEventProcessing{
|
||||
wc: wc,
|
||||
processingQueue: make(chan chan *processingResult, processEventConcurrency-1),
|
||||
|
||||
objectType: wc.watcher.objectType,
|
||||
groupResource: wc.watcher.groupResource,
|
||||
}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
p.scheduleEventProcessing(wc.ctx, wg)
|
||||
}()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
p.collectEventProcessing(wc.ctx)
|
||||
}()
|
||||
}
|
||||
|
||||
type processingResult struct {
|
||||
event *watch.Event
|
||||
err error
|
||||
}
|
||||
|
||||
type concurrentOrderedEventProcessing struct {
|
||||
wc *watchChan
|
||||
|
||||
processingQueue chan chan *processingResult
|
||||
// Metadata for logging
|
||||
objectType string
|
||||
groupResource schema.GroupResource
|
||||
}
|
||||
|
||||
func (p *concurrentOrderedEventProcessing) scheduleEventProcessing(ctx context.Context, wg *sync.WaitGroup) {
|
||||
var e *event
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case e = <-p.wc.incomingEventChan:
|
||||
}
|
||||
processingResponse := make(chan *processingResult, 1)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case p.processingQueue <- processingResponse:
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(e *event, response chan<- *processingResult) {
|
||||
defer wg.Done()
|
||||
responseEvent, err := p.wc.transform(e)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case response <- &processingResult{event: responseEvent, err: err}:
|
||||
}
|
||||
}(e, processingResponse)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *concurrentOrderedEventProcessing) collectEventProcessing(ctx context.Context) {
|
||||
var processingResponse chan *processingResult
|
||||
var r *processingResult
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case processingResponse = <-p.processingQueue:
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case r = <-processingResponse:
|
||||
}
|
||||
if r.err != nil {
|
||||
p.wc.sendError(r.err)
|
||||
return
|
||||
}
|
||||
if r.event == nil {
|
||||
continue
|
||||
}
|
||||
if len(p.wc.resultChan) == cap(p.wc.resultChan) {
|
||||
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", p.wc.watcher.objectType, "groupResource", p.wc.watcher.groupResource)
|
||||
}
|
||||
// If user couldn't receive results fast enough, we also block incoming events from watcher.
|
||||
// Because storing events in local will cause more memory usage.
|
||||
// The worst case would be closing the fast watcher.
|
||||
select {
|
||||
case p.wc.resultChan <- *r.event:
|
||||
case <-p.wc.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) filter(obj runtime.Object) bool {
|
||||
if wc.internalPred.Empty() {
|
||||
return true
|
||||
}
|
||||
matched, err := wc.internalPred.Matches(obj)
|
||||
return err == nil && matched
|
||||
}
|
||||
|
||||
func (wc *watchChan) acceptAll() bool {
|
||||
return wc.internalPred.Empty()
|
||||
}
|
||||
|
||||
// transform transforms an event into a result for user if not filtered.
|
||||
func (wc *watchChan) transform(e *event) (res *watch.Event, err error) {
|
||||
curObj, oldObj, err := wc.prepareObjs(e)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to prepare current and previous objects: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch {
|
||||
case e.isProgressNotify:
|
||||
object := wc.watcher.newFunc()
|
||||
if err := wc.watcher.versioner.UpdateObject(object, uint64(e.rev)); err != nil {
|
||||
klog.Errorf("failed to propagate object version: %v", err)
|
||||
return nil, fmt.Errorf("failed to propagate object resource version: %w", err)
|
||||
}
|
||||
if e.isInitialEventsEndBookmark {
|
||||
if err := storage.AnnotateInitialEventsEndBookmark(object); err != nil {
|
||||
return nil, fmt.Errorf("error while accessing object's metadata gr: %v, type: %v, obj: %#v, err: %w", wc.watcher.groupResource, wc.watcher.objectType, object, err)
|
||||
}
|
||||
}
|
||||
res = &watch.Event{
|
||||
Type: watch.Bookmark,
|
||||
Object: object,
|
||||
}
|
||||
case e.isDeleted:
|
||||
if !wc.filter(oldObj) {
|
||||
return nil, nil
|
||||
}
|
||||
res = &watch.Event{
|
||||
Type: watch.Deleted,
|
||||
Object: oldObj,
|
||||
}
|
||||
case e.isCreated:
|
||||
if !wc.filter(curObj) {
|
||||
return nil, nil
|
||||
}
|
||||
res = &watch.Event{
|
||||
Type: watch.Added,
|
||||
Object: curObj,
|
||||
}
|
||||
default:
|
||||
if wc.acceptAll() {
|
||||
res = &watch.Event{
|
||||
Type: watch.Modified,
|
||||
Object: curObj,
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
curObjPasses := wc.filter(curObj)
|
||||
oldObjPasses := wc.filter(oldObj)
|
||||
switch {
|
||||
case curObjPasses && oldObjPasses:
|
||||
res = &watch.Event{
|
||||
Type: watch.Modified,
|
||||
Object: curObj,
|
||||
}
|
||||
case curObjPasses && !oldObjPasses:
|
||||
res = &watch.Event{
|
||||
Type: watch.Added,
|
||||
Object: curObj,
|
||||
}
|
||||
case !curObjPasses && oldObjPasses:
|
||||
res = &watch.Event{
|
||||
Type: watch.Deleted,
|
||||
Object: oldObj,
|
||||
}
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func transformErrorToEvent(err error) *watch.Event {
|
||||
err = interpretWatchError(err)
|
||||
if _, ok := err.(apierrors.APIStatus); !ok {
|
||||
err = apierrors.NewInternalError(err)
|
||||
}
|
||||
status := err.(apierrors.APIStatus).Status()
|
||||
return &watch.Event{
|
||||
Type: watch.Error,
|
||||
Object: &status,
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) sendError(err error) {
|
||||
select {
|
||||
case wc.errChan <- err:
|
||||
case <-wc.ctx.Done():
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) sendEvent(e *event) {
|
||||
if len(wc.incomingEventChan) == incomingBufSize {
|
||||
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow decoding, user not receiving fast, or other processing logic", "incomingEvents", incomingBufSize, "objectType", wc.watcher.objectType, "groupResource", wc.watcher.groupResource)
|
||||
}
|
||||
select {
|
||||
case wc.incomingEventChan <- e:
|
||||
case <-wc.ctx.Done():
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) prepareObjs(e *event) (curObj runtime.Object, oldObj runtime.Object, err error) {
|
||||
if e.isProgressNotify {
|
||||
// progressNotify events doesn't contain neither current nor previous object version,
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
if !e.isDeleted {
|
||||
data, _, err := wc.watcher.transformer.TransformFromStorage(wc.ctx, e.value, authenticatedDataString(e.key))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
curObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
// We need to decode prevValue, only if this is deletion event or
|
||||
// the underlying filter doesn't accept all objects (otherwise we
|
||||
// know that the filter for previous object will return true and
|
||||
// we need the object only to compute whether it was filtered out
|
||||
// before).
|
||||
if len(e.prevValue) > 0 && (e.isDeleted || !wc.acceptAll()) {
|
||||
data, _, err := wc.watcher.transformer.TransformFromStorage(wc.ctx, e.prevValue, authenticatedDataString(e.key))
|
||||
if err != nil {
|
||||
return nil, nil, wc.watcher.transformIfCorruptObjectError(e, err)
|
||||
}
|
||||
// Note that this sends the *old* object with the etcd revision for the time at
|
||||
// which it gets deleted.
|
||||
oldObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev)
|
||||
if err != nil {
|
||||
return nil, nil, wc.watcher.transformIfCorruptObjectError(e, err)
|
||||
}
|
||||
}
|
||||
return curObj, oldObj, nil
|
||||
}
|
||||
|
||||
type corruptObjectDeletedError struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (e *corruptObjectDeletedError) Error() string {
|
||||
return fmt.Sprintf("saw a DELETED event, but object data is corrupt - %v", e.err)
|
||||
}
|
||||
func (e *corruptObjectDeletedError) Unwrap() error { return e.err }
|
||||
|
||||
func (w *watcher) transformIfCorruptObjectError(e *event, err error) error {
|
||||
var corruptObjErr *corruptObjectError
|
||||
if !e.isDeleted || !errors.As(err, &corruptObjErr) {
|
||||
return err
|
||||
}
|
||||
|
||||
// if we are here it means we received a DELETED event but the object
|
||||
// associated with it is corrupt because we failed to transform or
|
||||
// decode the data associated with the object.
|
||||
// wrap the original error so we can send a proper watch Error event.
|
||||
return &corruptObjectDeletedError{err: corruptObjErr}
|
||||
}
|
||||
|
||||
func decodeObj(codec runtime.Codec, versioner storage.Versioner, data []byte, rev int64) (_ runtime.Object, err error) {
|
||||
obj, err := runtime.Decode(codec, []byte(data))
|
||||
if err != nil {
|
||||
if fatalOnDecodeError {
|
||||
// we are running in a test environment and thus an
|
||||
// error here is due to a coder mistake if the defer
|
||||
// does not catch it
|
||||
panic(err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
// ensure resource version is set on the object we load from etcd
|
||||
if err := versioner.UpdateObject(obj, uint64(rev)); err != nil {
|
||||
return nil, fmt.Errorf("failure to version api object (%d) %#v: %v", rev, obj, err)
|
||||
}
|
||||
return obj, nil
|
||||
}
|
172
e2e/vendor/k8s.io/apiserver/pkg/storage/feature/feature_support_checker.go
generated
vendored
172
e2e/vendor/k8s.io/apiserver/pkg/storage/feature/feature_support_checker.go
generated
vendored
@ -1,172 +0,0 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package feature
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/version"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
var (
|
||||
// Define these static versions to use for checking version of etcd, issue on kubernetes #123192
|
||||
v3_4_31 = version.MustParseSemantic("3.4.31")
|
||||
v3_5_0 = version.MustParseSemantic("3.5.0")
|
||||
v3_5_13 = version.MustParseSemantic("3.5.13")
|
||||
|
||||
// DefaultFeatureSupportChecker is a shared global etcd FeatureSupportChecker.
|
||||
DefaultFeatureSupportChecker FeatureSupportChecker = newDefaultFeatureSupportChecker()
|
||||
)
|
||||
|
||||
// FeatureSupportChecker to define Supports functions.
|
||||
type FeatureSupportChecker interface {
|
||||
// Supports check if the feature is supported or not by checking internal cache.
|
||||
// By default all calls to this function before calling CheckClient returns false.
|
||||
// Returns true if all endpoints in etcd clients are supporting the feature.
|
||||
// If client A supports and client B doesn't support the feature, the `Supports` will
|
||||
// first return true at client A initializtion and then return false on client B
|
||||
// initialzation, it can flip the support at runtime.
|
||||
Supports(feature storage.Feature) bool
|
||||
// CheckClient works with etcd client to recalcualte feature support and cache it internally.
|
||||
// All etcd clients should support feature to cause `Supports` return true.
|
||||
// If client A supports and client B doesn't support the feature, the `Supports` will
|
||||
// first return true at client A initializtion and then return false on client B
|
||||
// initialzation, it can flip the support at runtime.
|
||||
CheckClient(ctx context.Context, c client, feature storage.Feature)
|
||||
}
|
||||
|
||||
type defaultFeatureSupportChecker struct {
|
||||
lock sync.Mutex
|
||||
progressNotifySupported *bool
|
||||
checkingEndpoint map[string]struct{}
|
||||
}
|
||||
|
||||
func newDefaultFeatureSupportChecker() *defaultFeatureSupportChecker {
|
||||
return &defaultFeatureSupportChecker{
|
||||
checkingEndpoint: make(map[string]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Supports can check the featue from anywhere without storage if it was cached before.
|
||||
func (f *defaultFeatureSupportChecker) Supports(feature storage.Feature) bool {
|
||||
switch feature {
|
||||
case storage.RequestWatchProgress:
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
return ptr.Deref(f.progressNotifySupported, false)
|
||||
default:
|
||||
runtime.HandleError(fmt.Errorf("feature %q is not implemented in DefaultFeatureSupportChecker", feature))
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// CheckClient accepts client and calculate the support per endpoint and caches it.
|
||||
func (f *defaultFeatureSupportChecker) CheckClient(ctx context.Context, c client, feature storage.Feature) {
|
||||
switch feature {
|
||||
case storage.RequestWatchProgress:
|
||||
f.checkClient(ctx, c)
|
||||
default:
|
||||
runtime.HandleError(fmt.Errorf("feature %q is not implemented in DefaultFeatureSupportChecker", feature))
|
||||
}
|
||||
}
|
||||
|
||||
func (f *defaultFeatureSupportChecker) checkClient(ctx context.Context, c client) {
|
||||
// start with 10 ms, multiply by 2 each step, until 15 s and stays on 15 seconds.
|
||||
delayFunc := wait.Backoff{
|
||||
Duration: 10 * time.Millisecond,
|
||||
Cap: 15 * time.Second,
|
||||
Factor: 2.0,
|
||||
Steps: 11}.DelayFunc()
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
for _, ep := range c.Endpoints() {
|
||||
if _, found := f.checkingEndpoint[ep]; found {
|
||||
continue
|
||||
}
|
||||
f.checkingEndpoint[ep] = struct{}{}
|
||||
go func(ep string) {
|
||||
defer runtime.HandleCrash()
|
||||
err := delayFunc.Until(ctx, true, true, func(ctx context.Context) (done bool, err error) {
|
||||
internalErr := f.clientSupportsRequestWatchProgress(ctx, c, ep)
|
||||
return internalErr == nil, nil
|
||||
})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to check if RequestWatchProgress is supported by etcd after retrying")
|
||||
}
|
||||
}(ep)
|
||||
}
|
||||
}
|
||||
|
||||
func (f *defaultFeatureSupportChecker) clientSupportsRequestWatchProgress(ctx context.Context, c client, ep string) error {
|
||||
supported, err := endpointSupportsRequestWatchProgress(ctx, c, ep)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if !supported {
|
||||
klog.Infof("RequestWatchProgress feature is not supported by %q endpoint", ep)
|
||||
f.progressNotifySupported = ptr.To(false)
|
||||
return nil
|
||||
}
|
||||
if f.progressNotifySupported == nil {
|
||||
f.progressNotifySupported = ptr.To(true)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sub interface of etcd client.
|
||||
type client interface {
|
||||
// Endpoints returns list of endpoints in etcd client.
|
||||
Endpoints() []string
|
||||
// Status retrieves the status information from the etcd client connected to the specified endpoint.
|
||||
// It takes a context.Context parameter for cancellation or timeout control.
|
||||
// It returns a clientv3.StatusResponse containing the status information or an error if the operation fails.
|
||||
Status(ctx context.Context, endpoint string) (*clientv3.StatusResponse, error)
|
||||
}
|
||||
|
||||
// endpointSupportsRequestWatchProgress evaluates whether RequestWatchProgress supported by current version of etcd endpoint.
|
||||
// Based on this issues:
|
||||
// - https://github.com/etcd-io/etcd/issues/15220 - Fixed in etcd v3.4.25+ and v3.5.8+
|
||||
// - https://github.com/etcd-io/etcd/issues/17507 - Fixed in etcd v3.4.31+ and v3.5.13+
|
||||
func endpointSupportsRequestWatchProgress(ctx context.Context, c client, endpoint string) (bool, error) {
|
||||
resp, err := c.Status(ctx, endpoint)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed checking etcd version, endpoint: %q: %w", endpoint, err)
|
||||
}
|
||||
ver, err := version.ParseSemantic(resp.Version)
|
||||
if err != nil {
|
||||
// Assume feature is not supported if etcd version cannot be parsed.
|
||||
klog.ErrorS(err, "Failed to parse etcd version", "version", resp.Version)
|
||||
return false, nil
|
||||
}
|
||||
if ver.LessThan(v3_4_31) || ver.AtLeast(v3_5_0) && ver.LessThan(v3_5_13) {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
5
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/OWNERS
generated
vendored
5
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/OWNERS
generated
vendored
@ -1,5 +0,0 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
reviewers:
|
||||
- smarterclayton
|
||||
- wojtek-t
|
127
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/config.go
generated
vendored
127
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/config.go
generated
vendored
@ -1,127 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package storagebackend
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
oteltrace "go.opentelemetry.io/otel/trace"
|
||||
noopoteltrace "go.opentelemetry.io/otel/trace/noop"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apiserver/pkg/server/egressselector"
|
||||
"k8s.io/apiserver/pkg/storage/etcd3"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
flowcontrolrequest "k8s.io/apiserver/pkg/util/flowcontrol/request"
|
||||
)
|
||||
|
||||
const (
|
||||
StorageTypeUnset = ""
|
||||
StorageTypeETCD2 = "etcd2"
|
||||
StorageTypeETCD3 = "etcd3"
|
||||
|
||||
DefaultCompactInterval = 5 * time.Minute
|
||||
DefaultDBMetricPollInterval = 30 * time.Second
|
||||
DefaultEventsHistoryWindow = 75 * time.Second
|
||||
DefaultHealthcheckTimeout = 2 * time.Second
|
||||
DefaultReadinessTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
// TransportConfig holds all connection related info, i.e. equal TransportConfig means equal servers we talk to.
|
||||
type TransportConfig struct {
|
||||
// ServerList is the list of storage servers to connect with.
|
||||
ServerList []string
|
||||
// TLS credentials
|
||||
KeyFile string
|
||||
CertFile string
|
||||
TrustedCAFile string
|
||||
// function to determine the egress dialer. (i.e. konnectivity server dialer)
|
||||
EgressLookup egressselector.Lookup
|
||||
// The TracerProvider can add tracing the connection
|
||||
TracerProvider oteltrace.TracerProvider
|
||||
}
|
||||
|
||||
// Config is configuration for creating a storage backend.
|
||||
type Config struct {
|
||||
// Type defines the type of storage backend. Default ("") is "etcd3".
|
||||
Type string
|
||||
// Prefix is the prefix to all keys passed to storage.Interface methods.
|
||||
Prefix string
|
||||
// Transport holds all connection related info, i.e. equal TransportConfig means equal servers we talk to.
|
||||
Transport TransportConfig
|
||||
|
||||
Codec runtime.Codec
|
||||
// EncodeVersioner is the same groupVersioner used to build the
|
||||
// storage encoder. Given a list of kinds the input object might belong
|
||||
// to, the EncodeVersioner outputs the gvk the object will be
|
||||
// converted to before persisted in etcd.
|
||||
EncodeVersioner runtime.GroupVersioner
|
||||
// Transformer allows the value to be transformed prior to persisting into etcd.
|
||||
Transformer value.Transformer
|
||||
|
||||
// CompactionInterval is an interval of requesting compaction from apiserver.
|
||||
// If the value is 0, no compaction will be issued.
|
||||
CompactionInterval time.Duration
|
||||
// CountMetricPollPeriod specifies how often should count metric be updated
|
||||
CountMetricPollPeriod time.Duration
|
||||
// DBMetricPollInterval specifies how often should storage backend metric be updated.
|
||||
DBMetricPollInterval time.Duration
|
||||
// EventsHistoryWindow specifies minimum history duration that storage is keeping.
|
||||
EventsHistoryWindow time.Duration
|
||||
// HealthcheckTimeout specifies the timeout used when checking health
|
||||
HealthcheckTimeout time.Duration
|
||||
// ReadycheckTimeout specifies the timeout used when checking readiness
|
||||
ReadycheckTimeout time.Duration
|
||||
|
||||
LeaseManagerConfig etcd3.LeaseManagerConfig
|
||||
|
||||
// StorageObjectCountTracker is used to keep track of the total
|
||||
// number of objects in the storage per resource.
|
||||
StorageObjectCountTracker flowcontrolrequest.StorageObjectCountTracker
|
||||
}
|
||||
|
||||
// ConfigForResource is a Config specialized to a particular `schema.GroupResource`
|
||||
type ConfigForResource struct {
|
||||
// Config is the resource-independent configuration
|
||||
Config
|
||||
|
||||
// GroupResource is the relevant one
|
||||
GroupResource schema.GroupResource
|
||||
}
|
||||
|
||||
// ForResource specializes to the given resource
|
||||
func (config *Config) ForResource(resource schema.GroupResource) *ConfigForResource {
|
||||
return &ConfigForResource{
|
||||
Config: *config,
|
||||
GroupResource: resource,
|
||||
}
|
||||
}
|
||||
|
||||
func NewDefaultConfig(prefix string, codec runtime.Codec) *Config {
|
||||
return &Config{
|
||||
Prefix: prefix,
|
||||
Codec: codec,
|
||||
CompactionInterval: DefaultCompactInterval,
|
||||
DBMetricPollInterval: DefaultDBMetricPollInterval,
|
||||
EventsHistoryWindow: DefaultEventsHistoryWindow,
|
||||
HealthcheckTimeout: DefaultHealthcheckTimeout,
|
||||
ReadycheckTimeout: DefaultReadinessTimeout,
|
||||
LeaseManagerConfig: etcd3.NewDefaultLeaseManagerConfig(),
|
||||
Transport: TransportConfig{TracerProvider: noopoteltrace.NewTracerProvider()},
|
||||
}
|
||||
}
|
503
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go
generated
vendored
503
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go
generated
vendored
@ -1,503 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package factory
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
grpcprom "github.com/grpc-ecosystem/go-grpc-prometheus"
|
||||
"go.etcd.io/etcd/client/pkg/v3/logutil"
|
||||
"go.etcd.io/etcd/client/pkg/v3/transport"
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"go.etcd.io/etcd/client/v3/kubernetes"
|
||||
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
"golang.org/x/time/rate"
|
||||
"google.golang.org/grpc"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
utilnet "k8s.io/apimachinery/pkg/util/net"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
genericfeatures "k8s.io/apiserver/pkg/features"
|
||||
"k8s.io/apiserver/pkg/server/egressselector"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/etcd3"
|
||||
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
|
||||
"k8s.io/apiserver/pkg/storage/storagebackend"
|
||||
"k8s.io/apiserver/pkg/storage/value/encrypt/identity"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
tracing "k8s.io/component-base/tracing"
|
||||
)
|
||||
|
||||
const (
|
||||
// The short keepalive timeout and interval have been chosen to aggressively
|
||||
// detect a failed etcd server without introducing much overhead.
|
||||
keepaliveTime = 30 * time.Second
|
||||
keepaliveTimeout = 10 * time.Second
|
||||
|
||||
// dialTimeout is the timeout for failing to establish a connection.
|
||||
// It is set to 20 seconds as times shorter than that will cause TLS connections to fail
|
||||
// on heavily loaded arm64 CPUs (issue #64649)
|
||||
dialTimeout = 20 * time.Second
|
||||
|
||||
dbMetricsMonitorJitter = 0.5
|
||||
)
|
||||
|
||||
// TODO(negz): Stop using a package scoped logger. At the time of writing we're
|
||||
// creating an etcd client for each CRD. We need to pass each etcd client a
|
||||
// logger or each client will create its own, which comes with a significant
|
||||
// memory cost (around 20% of the API server's memory when hundreds of CRDs are
|
||||
// present). The correct fix here is to not create a client per CRD. See
|
||||
// https://github.com/kubernetes/kubernetes/issues/111476 for more.
|
||||
var etcd3ClientLogger *zap.Logger
|
||||
|
||||
func init() {
|
||||
// grpcprom auto-registers (via an init function) their client metrics, since we are opting out of
|
||||
// using the global prometheus registry and using our own wrapped global registry,
|
||||
// we need to explicitly register these metrics to our global registry here.
|
||||
// For reference: https://github.com/kubernetes/kubernetes/pull/81387
|
||||
legacyregistry.RawMustRegister(grpcprom.DefaultClientMetrics)
|
||||
dbMetricsMonitors = make(map[string]struct{})
|
||||
|
||||
l, err := logutil.CreateDefaultZapLogger(etcdClientDebugLevel())
|
||||
if err != nil {
|
||||
l = zap.NewNop()
|
||||
}
|
||||
etcd3ClientLogger = l.Named("etcd-client")
|
||||
}
|
||||
|
||||
// etcdClientDebugLevel translates ETCD_CLIENT_DEBUG into zap log level.
|
||||
// NOTE(negz): This is a copy of a private etcd client function:
|
||||
// https://github.com/etcd-io/etcd/blob/v3.5.4/client/v3/logger.go#L47
|
||||
func etcdClientDebugLevel() zapcore.Level {
|
||||
envLevel := os.Getenv("ETCD_CLIENT_DEBUG")
|
||||
if envLevel == "" || envLevel == "true" {
|
||||
return zapcore.InfoLevel
|
||||
}
|
||||
var l zapcore.Level
|
||||
if err := l.Set(envLevel); err == nil {
|
||||
log.Printf("Deprecated env ETCD_CLIENT_DEBUG value. Using default level: 'info'")
|
||||
return zapcore.InfoLevel
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func newETCD3HealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
|
||||
timeout := storagebackend.DefaultHealthcheckTimeout
|
||||
if c.HealthcheckTimeout != time.Duration(0) {
|
||||
timeout = c.HealthcheckTimeout
|
||||
}
|
||||
return newETCD3Check(c, timeout, stopCh)
|
||||
}
|
||||
|
||||
func newETCD3ReadyCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
|
||||
timeout := storagebackend.DefaultReadinessTimeout
|
||||
if c.ReadycheckTimeout != time.Duration(0) {
|
||||
timeout = c.ReadycheckTimeout
|
||||
}
|
||||
return newETCD3Check(c, timeout, stopCh)
|
||||
}
|
||||
|
||||
// atomic error acts as a cache for atomically store an error
|
||||
// the error is only updated if the timestamp is more recent than
|
||||
// current stored error.
|
||||
type atomicLastError struct {
|
||||
mu sync.RWMutex
|
||||
err error
|
||||
timestamp time.Time
|
||||
}
|
||||
|
||||
func (a *atomicLastError) Store(err error, t time.Time) {
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
if a.timestamp.IsZero() || a.timestamp.Before(t) {
|
||||
a.err = err
|
||||
a.timestamp = t
|
||||
}
|
||||
}
|
||||
|
||||
func (a *atomicLastError) Load() error {
|
||||
a.mu.RLock()
|
||||
defer a.mu.RUnlock()
|
||||
return a.err
|
||||
}
|
||||
|
||||
func newETCD3Check(c storagebackend.Config, timeout time.Duration, stopCh <-chan struct{}) (func() error, error) {
|
||||
// constructing the etcd v3 client blocks and times out if etcd is not available.
|
||||
// retry in a loop in the background until we successfully create the client, storing the client or error encountered
|
||||
|
||||
lock := sync.RWMutex{}
|
||||
var prober *etcd3ProberMonitor
|
||||
clientErr := fmt.Errorf("etcd client connection not yet established")
|
||||
|
||||
go wait.PollImmediateUntil(time.Second, func() (bool, error) {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
newProber, err := newETCD3ProberMonitor(c)
|
||||
// Ensure that server is already not shutting down.
|
||||
select {
|
||||
case <-stopCh:
|
||||
if err == nil {
|
||||
newProber.Close()
|
||||
}
|
||||
return true, nil
|
||||
default:
|
||||
}
|
||||
if err != nil {
|
||||
clientErr = err
|
||||
return false, nil
|
||||
}
|
||||
prober = newProber
|
||||
clientErr = nil
|
||||
return true, nil
|
||||
}, stopCh)
|
||||
|
||||
// Close the client on shutdown.
|
||||
go func() {
|
||||
defer utilruntime.HandleCrash()
|
||||
<-stopCh
|
||||
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
if prober != nil {
|
||||
prober.Close()
|
||||
clientErr = fmt.Errorf("server is shutting down")
|
||||
}
|
||||
}()
|
||||
|
||||
// limit to a request every half of the configured timeout with a maximum burst of one
|
||||
// rate limited requests will receive the last request sent error (note: not the last received response)
|
||||
limiter := rate.NewLimiter(rate.Every(timeout/2), 1)
|
||||
// initial state is the clientErr
|
||||
lastError := &atomicLastError{err: fmt.Errorf("etcd client connection not yet established")}
|
||||
|
||||
return func() error {
|
||||
// Given that client is closed on shutdown we hold the lock for
|
||||
// the entire period of healthcheck call to ensure that client will
|
||||
// not be closed during healthcheck.
|
||||
// Given that healthchecks has a 2s timeout, worst case of blocking
|
||||
// shutdown for additional 2s seems acceptable.
|
||||
lock.RLock()
|
||||
defer lock.RUnlock()
|
||||
|
||||
if clientErr != nil {
|
||||
return clientErr
|
||||
}
|
||||
if limiter.Allow() == false {
|
||||
return lastError.Load()
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
now := time.Now()
|
||||
err := prober.Probe(ctx)
|
||||
lastError.Store(err, now)
|
||||
return err
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newETCD3ProberMonitor(c storagebackend.Config) (*etcd3ProberMonitor, error) {
|
||||
client, err := newETCD3Client(c.Transport)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &etcd3ProberMonitor{
|
||||
client: client.Client,
|
||||
prefix: c.Prefix,
|
||||
endpoints: c.Transport.ServerList,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type etcd3ProberMonitor struct {
|
||||
prefix string
|
||||
endpoints []string
|
||||
|
||||
mux sync.RWMutex
|
||||
client *clientv3.Client
|
||||
closed bool
|
||||
}
|
||||
|
||||
func (t *etcd3ProberMonitor) Close() error {
|
||||
t.mux.Lock()
|
||||
defer t.mux.Unlock()
|
||||
if !t.closed {
|
||||
t.closed = true
|
||||
return t.client.Close()
|
||||
}
|
||||
return fmt.Errorf("closed")
|
||||
}
|
||||
|
||||
func (t *etcd3ProberMonitor) Probe(ctx context.Context) error {
|
||||
t.mux.RLock()
|
||||
defer t.mux.RUnlock()
|
||||
if t.closed {
|
||||
return fmt.Errorf("closed")
|
||||
}
|
||||
// See https://github.com/etcd-io/etcd/blob/c57f8b3af865d1b531b979889c602ba14377420e/etcdctl/ctlv3/command/ep_command.go#L118
|
||||
_, err := t.client.Get(ctx, path.Join("/", t.prefix, "health"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting data from etcd: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *etcd3ProberMonitor) Monitor(ctx context.Context) (metrics.StorageMetrics, error) {
|
||||
t.mux.RLock()
|
||||
defer t.mux.RUnlock()
|
||||
if t.closed {
|
||||
return metrics.StorageMetrics{}, fmt.Errorf("closed")
|
||||
}
|
||||
status, err := t.client.Status(ctx, t.endpoints[rand.Int()%len(t.endpoints)])
|
||||
if err != nil {
|
||||
return metrics.StorageMetrics{}, err
|
||||
}
|
||||
return metrics.StorageMetrics{
|
||||
Size: status.DbSize,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var newETCD3Client = func(c storagebackend.TransportConfig) (*kubernetes.Client, error) {
|
||||
tlsInfo := transport.TLSInfo{
|
||||
CertFile: c.CertFile,
|
||||
KeyFile: c.KeyFile,
|
||||
TrustedCAFile: c.TrustedCAFile,
|
||||
}
|
||||
tlsConfig, err := tlsInfo.ClientConfig()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// NOTE: Client relies on nil tlsConfig
|
||||
// for non-secure connections, update the implicit variable
|
||||
if len(c.CertFile) == 0 && len(c.KeyFile) == 0 && len(c.TrustedCAFile) == 0 {
|
||||
tlsConfig = nil
|
||||
}
|
||||
networkContext := egressselector.Etcd.AsNetworkContext()
|
||||
var egressDialer utilnet.DialFunc
|
||||
if c.EgressLookup != nil {
|
||||
egressDialer, err = c.EgressLookup(networkContext)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
dialOptions := []grpc.DialOption{
|
||||
grpc.WithBlock(), // block until the underlying connection is up
|
||||
// use chained interceptors so that the default (retry and backoff) interceptors are added.
|
||||
// otherwise they will be overwritten by the metric interceptor.
|
||||
//
|
||||
// these optional interceptors will be placed after the default ones.
|
||||
// which seems to be what we want as the metrics will be collected on each attempt (retry)
|
||||
grpc.WithChainUnaryInterceptor(grpcprom.UnaryClientInterceptor),
|
||||
grpc.WithChainStreamInterceptor(grpcprom.StreamClientInterceptor),
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.APIServerTracing) {
|
||||
tracingOpts := []otelgrpc.Option{
|
||||
otelgrpc.WithMessageEvents(otelgrpc.ReceivedEvents, otelgrpc.SentEvents),
|
||||
otelgrpc.WithPropagators(tracing.Propagators()),
|
||||
otelgrpc.WithTracerProvider(c.TracerProvider),
|
||||
}
|
||||
// Even with Noop TracerProvider, the otelgrpc still handles context propagation.
|
||||
// See https://github.com/open-telemetry/opentelemetry-go/tree/main/example/passthrough
|
||||
dialOptions = append(dialOptions,
|
||||
grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor(tracingOpts...)),
|
||||
grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor(tracingOpts...)))
|
||||
}
|
||||
if egressDialer != nil {
|
||||
dialer := func(ctx context.Context, addr string) (net.Conn, error) {
|
||||
if strings.Contains(addr, "//") {
|
||||
// etcd client prior to 3.5 passed URLs to dialer, normalize to address
|
||||
u, err := url.Parse(addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
addr = u.Host
|
||||
}
|
||||
return egressDialer(ctx, "tcp", addr)
|
||||
}
|
||||
dialOptions = append(dialOptions, grpc.WithContextDialer(dialer))
|
||||
}
|
||||
|
||||
cfg := clientv3.Config{
|
||||
DialTimeout: dialTimeout,
|
||||
DialKeepAliveTime: keepaliveTime,
|
||||
DialKeepAliveTimeout: keepaliveTimeout,
|
||||
DialOptions: dialOptions,
|
||||
Endpoints: c.ServerList,
|
||||
TLS: tlsConfig,
|
||||
Logger: etcd3ClientLogger,
|
||||
}
|
||||
|
||||
return kubernetes.New(cfg)
|
||||
}
|
||||
|
||||
type runningCompactor struct {
|
||||
interval time.Duration
|
||||
cancel context.CancelFunc
|
||||
client *clientv3.Client
|
||||
refs int
|
||||
}
|
||||
|
||||
var (
|
||||
// compactorsMu guards access to compactors map
|
||||
compactorsMu sync.Mutex
|
||||
compactors = map[string]*runningCompactor{}
|
||||
// dbMetricsMonitorsMu guards access to dbMetricsMonitors map
|
||||
dbMetricsMonitorsMu sync.Mutex
|
||||
dbMetricsMonitors map[string]struct{}
|
||||
)
|
||||
|
||||
// startCompactorOnce start one compactor per transport. If the interval get smaller on repeated calls, the
|
||||
// compactor is replaced. A destroy func is returned. If all destroy funcs with the same transport are called,
|
||||
// the compactor is stopped.
|
||||
func startCompactorOnce(c storagebackend.TransportConfig, interval time.Duration) (func(), error) {
|
||||
compactorsMu.Lock()
|
||||
defer compactorsMu.Unlock()
|
||||
|
||||
if interval == 0 {
|
||||
// short circuit, if the compaction request from apiserver is disabled
|
||||
return func() {}, nil
|
||||
}
|
||||
key := fmt.Sprintf("%v", c) // gives: {[server1 server2] keyFile certFile caFile}
|
||||
if compactor, foundBefore := compactors[key]; !foundBefore || compactor.interval > interval {
|
||||
client, err := newETCD3Client(c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
compactorClient := client.Client
|
||||
|
||||
if foundBefore {
|
||||
// replace compactor
|
||||
compactor.cancel()
|
||||
compactor.client.Close()
|
||||
} else {
|
||||
// start new compactor
|
||||
compactor = &runningCompactor{}
|
||||
compactors[key] = compactor
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
compactor.interval = interval
|
||||
compactor.cancel = cancel
|
||||
compactor.client = compactorClient
|
||||
|
||||
etcd3.StartCompactor(ctx, compactorClient, interval)
|
||||
}
|
||||
|
||||
compactors[key].refs++
|
||||
|
||||
return func() {
|
||||
compactorsMu.Lock()
|
||||
defer compactorsMu.Unlock()
|
||||
|
||||
compactor := compactors[key]
|
||||
compactor.refs--
|
||||
if compactor.refs == 0 {
|
||||
compactor.cancel()
|
||||
compactor.client.Close()
|
||||
delete(compactors, key)
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newETCD3Storage(c storagebackend.ConfigForResource, newFunc, newListFunc func() runtime.Object, resourcePrefix string) (storage.Interface, DestroyFunc, error) {
|
||||
stopCompactor, err := startCompactorOnce(c.Transport, c.CompactionInterval)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
client, err := newETCD3Client(c.Transport)
|
||||
if err != nil {
|
||||
stopCompactor()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// decorate the KV instance so we can track etcd latency per request.
|
||||
client.KV = etcd3.NewETCDLatencyTracker(client.KV)
|
||||
|
||||
stopDBSizeMonitor, err := startDBSizeMonitorPerEndpoint(client.Client, c.DBMetricPollInterval)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
var once sync.Once
|
||||
destroyFunc := func() {
|
||||
// we know that storage destroy funcs are called multiple times (due to reuse in subresources).
|
||||
// Hence, we only destroy once.
|
||||
// TODO: fix duplicated storage destroy calls higher level
|
||||
once.Do(func() {
|
||||
stopCompactor()
|
||||
stopDBSizeMonitor()
|
||||
client.Close()
|
||||
})
|
||||
}
|
||||
transformer := c.Transformer
|
||||
if transformer == nil {
|
||||
transformer = identity.NewEncryptCheckTransformer()
|
||||
}
|
||||
|
||||
versioner := storage.APIObjectVersioner{}
|
||||
decoder := etcd3.NewDefaultDecoder(c.Codec, versioner)
|
||||
store := etcd3.New(client, c.Codec, newFunc, newListFunc, c.Prefix, resourcePrefix, c.GroupResource, transformer, c.LeaseManagerConfig, decoder, versioner)
|
||||
return store, destroyFunc, nil
|
||||
}
|
||||
|
||||
// startDBSizeMonitorPerEndpoint starts a loop to monitor etcd database size and update the
|
||||
// corresponding metric etcd_db_total_size_in_bytes for each etcd server endpoint.
|
||||
// Deprecated: Will be replaced with newETCD3ProberMonitor
|
||||
func startDBSizeMonitorPerEndpoint(client *clientv3.Client, interval time.Duration) (func(), error) {
|
||||
if interval == 0 {
|
||||
return func() {}, nil
|
||||
}
|
||||
dbMetricsMonitorsMu.Lock()
|
||||
defer dbMetricsMonitorsMu.Unlock()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
for _, ep := range client.Endpoints() {
|
||||
if _, found := dbMetricsMonitors[ep]; found {
|
||||
continue
|
||||
}
|
||||
dbMetricsMonitors[ep] = struct{}{}
|
||||
endpoint := ep
|
||||
klog.V(4).Infof("Start monitoring storage db size metric for endpoint %s with polling interval %v", endpoint, interval)
|
||||
go wait.JitterUntilWithContext(ctx, func(context.Context) {
|
||||
epStatus, err := client.Maintenance.Status(ctx, endpoint)
|
||||
if err != nil {
|
||||
klog.V(4).Infof("Failed to get storage db size for ep %s: %v", endpoint, err)
|
||||
metrics.UpdateEtcdDbSize(endpoint, -1)
|
||||
} else {
|
||||
metrics.UpdateEtcdDbSize(endpoint, epStatus.DbSize)
|
||||
}
|
||||
}, interval, dbMetricsMonitorJitter, true)
|
||||
}
|
||||
|
||||
return func() {
|
||||
cancel()
|
||||
}, nil
|
||||
}
|
93
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/factory.go
generated
vendored
93
e2e/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/factory.go
generated
vendored
@ -1,93 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package factory
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/etcd3/metrics"
|
||||
"k8s.io/apiserver/pkg/storage/storagebackend"
|
||||
)
|
||||
|
||||
// DestroyFunc is to destroy any resources used by the storage returned in Create() together.
|
||||
type DestroyFunc func()
|
||||
|
||||
// Create creates a storage backend based on given config.
|
||||
func Create(c storagebackend.ConfigForResource, newFunc, newListFunc func() runtime.Object, resourcePrefix string) (storage.Interface, DestroyFunc, error) {
|
||||
switch c.Type {
|
||||
case storagebackend.StorageTypeETCD2:
|
||||
return nil, nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
|
||||
return newETCD3Storage(c, newFunc, newListFunc, resourcePrefix)
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("unknown storage type: %s", c.Type)
|
||||
}
|
||||
}
|
||||
|
||||
// CreateHealthCheck creates a healthcheck function based on given config.
|
||||
func CreateHealthCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
|
||||
switch c.Type {
|
||||
case storagebackend.StorageTypeETCD2:
|
||||
return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
|
||||
return newETCD3HealthCheck(c, stopCh)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func CreateReadyCheck(c storagebackend.Config, stopCh <-chan struct{}) (func() error, error) {
|
||||
switch c.Type {
|
||||
case storagebackend.StorageTypeETCD2:
|
||||
return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
|
||||
return newETCD3ReadyCheck(c, stopCh)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func CreateProber(c storagebackend.Config) (Prober, error) {
|
||||
switch c.Type {
|
||||
case storagebackend.StorageTypeETCD2:
|
||||
return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
|
||||
return newETCD3ProberMonitor(c)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func CreateMonitor(c storagebackend.Config) (metrics.Monitor, error) {
|
||||
switch c.Type {
|
||||
case storagebackend.StorageTypeETCD2:
|
||||
return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type)
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
|
||||
return newETCD3ProberMonitor(c)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
|
||||
}
|
||||
}
|
||||
|
||||
// Prober is an interface that defines the Probe function for doing etcd readiness/liveness checks.
|
||||
type Prober interface {
|
||||
Probe(ctx context.Context) error
|
||||
Close() error
|
||||
}
|
8
e2e/vendor/k8s.io/apiserver/pkg/storage/value/OWNERS
generated
vendored
8
e2e/vendor/k8s.io/apiserver/pkg/storage/value/OWNERS
generated
vendored
@ -1,8 +0,0 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-auth-encryption-at-rest-approvers
|
||||
reviewers:
|
||||
- sig-auth-encryption-at-rest-reviewers
|
||||
labels:
|
||||
- sig/auth
|
273
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/aes/aes.go
generated
vendored
273
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/aes/aes.go
generated
vendored
@ -1,273 +0,0 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package aes transforms values for storage at rest using AES-GCM.
|
||||
package aes
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// commonSize is the length of various security sensitive byte slices such as encryption keys.
|
||||
// Do not change this value. It would be a backward incompatible change.
|
||||
const commonSize = 32
|
||||
|
||||
const keySizeCounterNonceGCM = commonSize
|
||||
|
||||
// NewGCMTransformerWithUniqueKeyUnsafe is the same as NewGCMTransformer but is unsafe for general
|
||||
// use because it makes assumptions about the key underlying the block cipher. Specifically,
|
||||
// it uses a 96-bit nonce where the first 32 bits are random data and the remaining 64 bits are
|
||||
// a monotonically incrementing atomic counter. This means that the key must be randomly generated
|
||||
// on process startup and must never be used for encryption outside the lifetime of the process.
|
||||
// Unlike NewGCMTransformer, this function is immune to the birthday attack and thus the key can
|
||||
// be used for 2^64-1 writes without rotation. Furthermore, cryptographic wear out of AES-GCM with
|
||||
// a sequential nonce occurs after 2^64 encryptions, which is not a concern for our use cases.
|
||||
// Even if that occurs, the nonce counter would overflow and crash the process. We have no concerns
|
||||
// around plaintext length because all stored items are small (less than 2 MB). To prevent the
|
||||
// chance of the block cipher being accidentally re-used, it is not taken in as input. Instead,
|
||||
// a new random key is generated and returned on every invocation of this function. This key is
|
||||
// used as the input to the block cipher. If the key is stored and retrieved at a later point,
|
||||
// it can be passed to NewGCMTransformer(aes.NewCipher(key)) to construct a transformer capable
|
||||
// of decrypting values encrypted by this transformer (that transformer must not be used for encryption).
|
||||
func NewGCMTransformerWithUniqueKeyUnsafe() (value.Transformer, []byte, error) {
|
||||
key, err := GenerateKey(keySizeCounterNonceGCM)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
block, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
nonceGen := &nonceGenerator{
|
||||
// we start the nonce counter at one billion so that we are
|
||||
// guaranteed to detect rollover across different go routines
|
||||
zero: 1_000_000_000,
|
||||
fatal: die,
|
||||
}
|
||||
nonceGen.nonce.Add(nonceGen.zero)
|
||||
|
||||
transformer, err := newGCMTransformerWithUniqueKeyUnsafe(block, nonceGen)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return transformer, key, nil
|
||||
}
|
||||
|
||||
func newGCMTransformerWithUniqueKeyUnsafe(block cipher.Block, nonceGen *nonceGenerator) (value.Transformer, error) {
|
||||
aead, err := newGCM(block)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nonceFunc := func(b []byte) error {
|
||||
// we only need 8 bytes to store our 64 bit incrementing nonce
|
||||
// instead of leaving the unused bytes as zeros, set those to random bits
|
||||
// this mostly protects us from weird edge cases like a VM restore that rewinds our atomic counter
|
||||
randNonceSize := len(b) - 8
|
||||
|
||||
if err := randomNonce(b[:randNonceSize]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
nonceGen.next(b[randNonceSize:])
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return &gcm{aead: aead, nonceFunc: nonceFunc}, nil
|
||||
}
|
||||
|
||||
func randomNonce(b []byte) error {
|
||||
_, err := rand.Read(b)
|
||||
return err
|
||||
}
|
||||
|
||||
type nonceGenerator struct {
|
||||
// even at one million encryptions per second, this counter is enough for half a million years
|
||||
// using this struct avoids alignment bugs: https://pkg.go.dev/sync/atomic#pkg-note-BUG
|
||||
nonce atomic.Uint64
|
||||
zero uint64
|
||||
fatal func(msg string)
|
||||
}
|
||||
|
||||
func (n *nonceGenerator) next(b []byte) {
|
||||
incrementingNonce := n.nonce.Add(1)
|
||||
if incrementingNonce <= n.zero {
|
||||
// this should never happen, and is unrecoverable if it does
|
||||
n.fatal("aes-gcm detected nonce overflow - cryptographic wear out has occurred")
|
||||
}
|
||||
binary.LittleEndian.PutUint64(b, incrementingNonce)
|
||||
}
|
||||
|
||||
func die(msg string) {
|
||||
// nolint:logcheck // we want the stack traces, log flushing, and process exiting logic from FatalDepth
|
||||
klog.FatalDepth(1, msg)
|
||||
}
|
||||
|
||||
// GenerateKey generates a random key using system randomness.
|
||||
func GenerateKey(length int) (key []byte, err error) {
|
||||
defer func(start time.Time) {
|
||||
value.RecordDataKeyGeneration(start, err)
|
||||
}(time.Now())
|
||||
key = make([]byte, length)
|
||||
if _, err = rand.Read(key); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return key, nil
|
||||
}
|
||||
|
||||
// NewGCMTransformer takes the given block cipher and performs encryption and decryption on the given data.
|
||||
// It implements AEAD encryption of the provided values given a cipher.Block algorithm.
|
||||
// The authenticated data provided as part of the value.Context method must match when the same
|
||||
// value is set to and loaded from storage. In order to ensure that values cannot be copied by
|
||||
// an attacker from a location under their control, use characteristics of the storage location
|
||||
// (such as the etcd key) as part of the authenticated data.
|
||||
//
|
||||
// Because this mode requires a generated IV and IV reuse is a known weakness of AES-GCM, keys
|
||||
// must be rotated before a birthday attack becomes feasible. NIST SP 800-38D
|
||||
// (http://csrc.nist.gov/publications/nistpubs/800-38D/SP-800-38D.pdf) recommends using the same
|
||||
// key with random 96-bit nonces (the default nonce length) no more than 2^32 times, and
|
||||
// therefore transformers using this implementation *must* ensure they allow for frequent key
|
||||
// rotation. Future work should include investigation of AES-GCM-SIV as an alternative to
|
||||
// random nonces.
|
||||
func NewGCMTransformer(block cipher.Block) (value.Transformer, error) {
|
||||
aead, err := newGCM(block)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &gcm{aead: aead, nonceFunc: randomNonce}, nil
|
||||
}
|
||||
|
||||
func newGCM(block cipher.Block) (cipher.AEAD, error) {
|
||||
aead, err := cipher.NewGCM(block)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if nonceSize := aead.NonceSize(); nonceSize != 12 { // all data in etcd will be broken if this ever changes
|
||||
return nil, fmt.Errorf("crypto/cipher.NewGCM returned unexpected nonce size: %d", nonceSize)
|
||||
}
|
||||
return aead, nil
|
||||
}
|
||||
|
||||
type gcm struct {
|
||||
aead cipher.AEAD
|
||||
nonceFunc func([]byte) error
|
||||
}
|
||||
|
||||
func (t *gcm) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
nonceSize := t.aead.NonceSize()
|
||||
if len(data) < nonceSize {
|
||||
return nil, false, errors.New("the stored data was shorter than the required size")
|
||||
}
|
||||
result, err := t.aead.Open(nil, data[:nonceSize], data[nonceSize:], dataCtx.AuthenticatedData())
|
||||
return result, false, err
|
||||
}
|
||||
|
||||
func (t *gcm) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
nonceSize := t.aead.NonceSize()
|
||||
result := make([]byte, nonceSize+t.aead.Overhead()+len(data))
|
||||
|
||||
if err := t.nonceFunc(result[:nonceSize]); err != nil {
|
||||
return nil, fmt.Errorf("failed to write nonce for AES-GCM: %w", err)
|
||||
}
|
||||
|
||||
cipherText := t.aead.Seal(result[nonceSize:nonceSize], result[:nonceSize], data, dataCtx.AuthenticatedData())
|
||||
return result[:nonceSize+len(cipherText)], nil
|
||||
}
|
||||
|
||||
// cbc implements encryption at rest of the provided values given a cipher.Block algorithm.
|
||||
type cbc struct {
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewCBCTransformer takes the given block cipher and performs encryption and decryption on the given
|
||||
// data.
|
||||
func NewCBCTransformer(block cipher.Block) value.Transformer {
|
||||
return &cbc{block: block}
|
||||
}
|
||||
|
||||
var (
|
||||
errInvalidBlockSize = errors.New("the stored data is not a multiple of the block size")
|
||||
errInvalidPKCS7Data = errors.New("invalid PKCS7 data (empty or not padded)")
|
||||
errInvalidPKCS7Padding = errors.New("invalid padding on input")
|
||||
)
|
||||
|
||||
func (t *cbc) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
blockSize := aes.BlockSize
|
||||
if len(data) < blockSize {
|
||||
return nil, false, errors.New("the stored data was shorter than the required size")
|
||||
}
|
||||
iv := data[:blockSize]
|
||||
data = data[blockSize:]
|
||||
|
||||
if len(data)%blockSize != 0 {
|
||||
return nil, false, errInvalidBlockSize
|
||||
}
|
||||
|
||||
result := make([]byte, len(data))
|
||||
copy(result, data)
|
||||
mode := cipher.NewCBCDecrypter(t.block, iv)
|
||||
mode.CryptBlocks(result, result)
|
||||
|
||||
// remove and verify PKCS#7 padding for CBC
|
||||
c := result[len(result)-1]
|
||||
paddingSize := int(c)
|
||||
size := len(result) - paddingSize
|
||||
if paddingSize == 0 || paddingSize > len(result) {
|
||||
return nil, false, errInvalidPKCS7Data
|
||||
}
|
||||
for i := 0; i < paddingSize; i++ {
|
||||
if result[size+i] != c {
|
||||
return nil, false, errInvalidPKCS7Padding
|
||||
}
|
||||
}
|
||||
|
||||
return result[:size], false, nil
|
||||
}
|
||||
|
||||
func (t *cbc) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
blockSize := aes.BlockSize
|
||||
paddingSize := blockSize - (len(data) % blockSize)
|
||||
result := make([]byte, blockSize+len(data)+paddingSize)
|
||||
iv := result[:blockSize]
|
||||
if _, err := io.ReadFull(rand.Reader, iv); err != nil {
|
||||
return nil, errors.New("unable to read sufficient random bytes")
|
||||
}
|
||||
copy(result[blockSize:], data)
|
||||
|
||||
// add PKCS#7 padding for CBC
|
||||
copy(result[blockSize+len(data):], bytes.Repeat([]byte{byte(paddingSize)}, paddingSize))
|
||||
|
||||
mode := cipher.NewCBCEncrypter(t.block, iv)
|
||||
mode.CryptBlocks(result[blockSize:], result[blockSize:])
|
||||
return result, nil
|
||||
}
|
186
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/aes/aes_extended_nonce.go
generated
vendored
186
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/aes/aes_extended_nonce.go
generated
vendored
@ -1,186 +0,0 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package aes
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/aes"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"golang.org/x/crypto/hkdf"
|
||||
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
const (
|
||||
// cacheTTL is the TTL of KDF cache entries. We assume that the value.Context.AuthenticatedData
|
||||
// for every call is the etcd storage path of the associated resource, and use that as the primary
|
||||
// cache key (with a secondary check that confirms that the info matches). Thus if a client
|
||||
// is constantly creating resources with new names (and thus new paths), they will keep adding new
|
||||
// entries to the cache for up to this TTL before the GC logic starts deleting old entries. Each
|
||||
// entry is ~300 bytes in size, so even a malicious client will be bounded in the overall memory
|
||||
// it can consume.
|
||||
cacheTTL = 10 * time.Minute
|
||||
|
||||
derivedKeySizeExtendedNonceGCM = commonSize
|
||||
infoSizeExtendedNonceGCM
|
||||
MinSeedSizeExtendedNonceGCM
|
||||
)
|
||||
|
||||
// NewHKDFExtendedNonceGCMTransformer is the same as NewGCMTransformer but trades storage,
|
||||
// memory and CPU to work around the limitations of AES-GCM's 12 byte nonce size. The input seed
|
||||
// is assumed to be a cryptographically strong slice of MinSeedSizeExtendedNonceGCM+ random bytes.
|
||||
// Unlike NewGCMTransformer, this function is immune to the birthday attack because a new key is generated
|
||||
// per encryption via a key derivation function: KDF(seed, random_bytes) -> key. The derived key is
|
||||
// only used once as an AES-GCM key with a random 12 byte nonce. This avoids any concerns around
|
||||
// cryptographic wear out (by either number of encryptions or the amount of data being encrypted).
|
||||
// Speaking on the cryptographic safety, the limit on the number of operations that can be preformed
|
||||
// with a single seed with derived keys and randomly generated nonces is not practically reachable.
|
||||
// Thus, the scheme does not impose any specific requirements on the seed rotation schedule.
|
||||
// Reusing the same seed is safe to do over time and across process restarts. Whenever a new
|
||||
// seed is needed, the caller should generate it via GenerateKey(MinSeedSizeExtendedNonceGCM).
|
||||
// In regard to KMSv2, organization standards or compliance policies around rotation may require
|
||||
// that the seed be rotated at some interval. This can be implemented externally by rotating
|
||||
// the key encryption key via a key ID change.
|
||||
func NewHKDFExtendedNonceGCMTransformer(seed []byte) (value.Transformer, error) {
|
||||
if seedLen := len(seed); seedLen < MinSeedSizeExtendedNonceGCM {
|
||||
return nil, fmt.Errorf("invalid seed length %d used for key generation", seedLen)
|
||||
}
|
||||
return &extendedNonceGCM{
|
||||
seed: seed,
|
||||
cache: newSimpleCache(clock.RealClock{}, cacheTTL),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type extendedNonceGCM struct {
|
||||
seed []byte
|
||||
cache *simpleCache
|
||||
}
|
||||
|
||||
func (e *extendedNonceGCM) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
if len(data) < infoSizeExtendedNonceGCM {
|
||||
return nil, false, errors.New("the stored data was shorter than the required size")
|
||||
}
|
||||
|
||||
info := data[:infoSizeExtendedNonceGCM]
|
||||
|
||||
transformer, err := e.derivedKeyTransformer(info, dataCtx, false)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("failed to derive read key from KDF: %w", err)
|
||||
}
|
||||
|
||||
return transformer.TransformFromStorage(ctx, data, dataCtx)
|
||||
}
|
||||
|
||||
func (e *extendedNonceGCM) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
info := make([]byte, infoSizeExtendedNonceGCM)
|
||||
if err := randomNonce(info); err != nil {
|
||||
return nil, fmt.Errorf("failed to generate info for KDF: %w", err)
|
||||
}
|
||||
|
||||
transformer, err := e.derivedKeyTransformer(info, dataCtx, true)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to derive write key from KDF: %w", err)
|
||||
}
|
||||
|
||||
return transformer.TransformToStorage(ctx, data, dataCtx)
|
||||
}
|
||||
|
||||
func (e *extendedNonceGCM) derivedKeyTransformer(info []byte, dataCtx value.Context, write bool) (value.Transformer, error) {
|
||||
if !write { // no need to check cache on write since we always generate a new transformer
|
||||
if transformer := e.cache.get(info, dataCtx); transformer != nil {
|
||||
return transformer, nil
|
||||
}
|
||||
|
||||
// on read, this is a subslice of a much larger slice and we do not want to hold onto that larger slice
|
||||
info = bytes.Clone(info)
|
||||
}
|
||||
|
||||
key, err := e.sha256KDFExpandOnly(info)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to KDF expand seed with info: %w", err)
|
||||
}
|
||||
|
||||
transformer, err := newGCMTransformerWithInfo(key, info)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build transformer with KDF derived key: %w", err)
|
||||
}
|
||||
|
||||
e.cache.set(dataCtx, transformer)
|
||||
|
||||
return transformer, nil
|
||||
}
|
||||
|
||||
func (e *extendedNonceGCM) sha256KDFExpandOnly(info []byte) ([]byte, error) {
|
||||
kdf := hkdf.Expand(sha256.New, e.seed, info)
|
||||
|
||||
derivedKey := make([]byte, derivedKeySizeExtendedNonceGCM)
|
||||
if _, err := io.ReadFull(kdf, derivedKey); err != nil {
|
||||
return nil, fmt.Errorf("failed to read a derived key from KDF: %w", err)
|
||||
}
|
||||
|
||||
return derivedKey, nil
|
||||
}
|
||||
|
||||
func newGCMTransformerWithInfo(key, info []byte) (*transformerWithInfo, error) {
|
||||
block, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
transformer, err := NewGCMTransformer(block)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &transformerWithInfo{transformer: transformer, info: info}, nil
|
||||
}
|
||||
|
||||
type transformerWithInfo struct {
|
||||
transformer value.Transformer
|
||||
// info are extra opaque bytes prepended to the writes from transformer and stripped from reads.
|
||||
// currently info is used to generate a key via KDF(seed, info) -> key
|
||||
// and transformer is the output of NewGCMTransformer(aes.NewCipher(key))
|
||||
info []byte
|
||||
}
|
||||
|
||||
func (t *transformerWithInfo) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
if !bytes.HasPrefix(data, t.info) {
|
||||
return nil, false, errors.New("the stored data is missing the required info prefix")
|
||||
}
|
||||
|
||||
return t.transformer.TransformFromStorage(ctx, data[len(t.info):], dataCtx)
|
||||
}
|
||||
|
||||
func (t *transformerWithInfo) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
out, err := t.transformer.TransformToStorage(ctx, data, dataCtx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
outWithInfo := make([]byte, 0, len(out)+len(t.info))
|
||||
outWithInfo = append(outWithInfo, t.info...)
|
||||
outWithInfo = append(outWithInfo, out...)
|
||||
|
||||
return outWithInfo, nil
|
||||
}
|
91
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/aes/cache.go
generated
vendored
91
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/aes/cache.go
generated
vendored
@ -1,91 +0,0 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package aes
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
utilcache "k8s.io/apimachinery/pkg/util/cache"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
type simpleCache struct {
|
||||
cache *utilcache.Expiring
|
||||
ttl time.Duration
|
||||
}
|
||||
|
||||
func newSimpleCache(clock clock.Clock, ttl time.Duration) *simpleCache {
|
||||
cache := utilcache.NewExpiringWithClock(clock)
|
||||
// "Stale" entries are always valid for us because the TTL is just used to prevent
|
||||
// unbounded growth on the cache - for a given info the transformer is always the same.
|
||||
// The key always corresponds to the exact same value, with the caveat that
|
||||
// since we use the value.Context.AuthenticatedData to overwrite old keys,
|
||||
// we always have to check that the info matches (to validate the transformer is correct).
|
||||
cache.AllowExpiredGet = true
|
||||
return &simpleCache{
|
||||
cache: cache,
|
||||
ttl: ttl,
|
||||
}
|
||||
}
|
||||
|
||||
// given a key, return the transformer, or nil if it does not exist in the cache
|
||||
func (c *simpleCache) get(info []byte, dataCtx value.Context) *transformerWithInfo {
|
||||
val, ok := c.cache.Get(keyFunc(dataCtx))
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
transformer := val.(*transformerWithInfo)
|
||||
|
||||
if !bytes.Equal(transformer.info, info) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return transformer
|
||||
}
|
||||
|
||||
// set caches the record for the key
|
||||
func (c *simpleCache) set(dataCtx value.Context, transformer *transformerWithInfo) {
|
||||
if dataCtx == nil || len(dataCtx.AuthenticatedData()) == 0 {
|
||||
panic("authenticated data must not be empty")
|
||||
}
|
||||
if transformer == nil {
|
||||
panic("transformer must not be nil")
|
||||
}
|
||||
if len(transformer.info) == 0 {
|
||||
panic("info must not be empty")
|
||||
}
|
||||
c.cache.Set(keyFunc(dataCtx), transformer, c.ttl)
|
||||
}
|
||||
|
||||
func keyFunc(dataCtx value.Context) string {
|
||||
return toString(dataCtx.AuthenticatedData())
|
||||
}
|
||||
|
||||
// toString performs unholy acts to avoid allocations
|
||||
func toString(b []byte) string {
|
||||
// unsafe.SliceData relies on cap whereas we want to rely on len
|
||||
if len(b) == 0 {
|
||||
return ""
|
||||
}
|
||||
// Copied from go 1.20.1 strings.Builder.String
|
||||
// https://github.com/golang/go/blob/202a1a57064127c3f19d96df57b9f9586145e21c/src/strings/builder.go#L48
|
||||
return unsafe.String(unsafe.SliceData(b), len(b))
|
||||
}
|
202
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/envelope.go
generated
vendored
202
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/envelope.go
generated
vendored
@ -1,202 +0,0 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package envelope transforms values for storage at rest using a Envelope provider
|
||||
package envelope
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
|
||||
"k8s.io/utils/lru"
|
||||
|
||||
"golang.org/x/crypto/cryptobyte"
|
||||
)
|
||||
|
||||
func init() {
|
||||
value.RegisterMetrics()
|
||||
metrics.RegisterMetrics()
|
||||
}
|
||||
|
||||
// Service allows encrypting and decrypting data using an external Key Management Service.
|
||||
type Service interface {
|
||||
// Decrypt a given bytearray to obtain the original data as bytes.
|
||||
Decrypt(data []byte) ([]byte, error)
|
||||
// Encrypt bytes to a ciphertext.
|
||||
Encrypt(data []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
type envelopeTransformer struct {
|
||||
envelopeService Service
|
||||
|
||||
// transformers is a thread-safe LRU cache which caches decrypted DEKs indexed by their encrypted form.
|
||||
transformers *lru.Cache
|
||||
|
||||
// baseTransformerFunc creates a new transformer for encrypting the data with the DEK.
|
||||
baseTransformerFunc func(cipher.Block) (value.Transformer, error)
|
||||
|
||||
cacheSize int
|
||||
cacheEnabled bool
|
||||
}
|
||||
|
||||
// NewEnvelopeTransformer returns a transformer which implements a KEK-DEK based envelope encryption scheme.
|
||||
// It uses envelopeService to encrypt and decrypt DEKs. Respective DEKs (in encrypted form) are prepended to
|
||||
// the data items they encrypt. A cache (of size cacheSize) is maintained to store the most recently
|
||||
// used decrypted DEKs in memory.
|
||||
func NewEnvelopeTransformer(envelopeService Service, cacheSize int, baseTransformerFunc func(cipher.Block) (value.Transformer, error)) value.Transformer {
|
||||
var (
|
||||
cache *lru.Cache
|
||||
)
|
||||
|
||||
if cacheSize > 0 {
|
||||
cache = lru.New(cacheSize)
|
||||
}
|
||||
return &envelopeTransformer{
|
||||
envelopeService: envelopeService,
|
||||
transformers: cache,
|
||||
baseTransformerFunc: baseTransformerFunc,
|
||||
cacheEnabled: cacheSize > 0,
|
||||
cacheSize: cacheSize,
|
||||
}
|
||||
}
|
||||
|
||||
// TransformFromStorage decrypts data encrypted by this transformer using envelope encryption.
|
||||
func (t *envelopeTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
metrics.RecordArrival(metrics.FromStorageLabel, time.Now())
|
||||
|
||||
// Read the 16 bit length-of-DEK encoded at the start of the encrypted DEK. 16 bits can
|
||||
// represent a maximum key length of 65536 bytes. We are using a 256 bit key, whose
|
||||
// length cannot fit in 8 bits (1 byte). Thus, we use 16 bits (2 bytes) to store the length.
|
||||
var encKey cryptobyte.String
|
||||
s := cryptobyte.String(data)
|
||||
if ok := s.ReadUint16LengthPrefixed(&encKey); !ok {
|
||||
return nil, false, fmt.Errorf("invalid data encountered by envelope transformer: failed to read uint16 length prefixed data")
|
||||
}
|
||||
|
||||
encData := []byte(s)
|
||||
|
||||
// Look up the decrypted DEK from cache or Envelope.
|
||||
transformer := t.getTransformer(encKey)
|
||||
if transformer == nil {
|
||||
if t.cacheEnabled {
|
||||
value.RecordCacheMiss()
|
||||
}
|
||||
key, err := t.envelopeService.Decrypt(encKey)
|
||||
if err != nil {
|
||||
// Do NOT wrap this err using fmt.Errorf() or similar functions
|
||||
// because this gRPC status error has useful error code when
|
||||
// record the metric.
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
transformer, err = t.addTransformer(encKey, key)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
}
|
||||
|
||||
return transformer.TransformFromStorage(ctx, encData, dataCtx)
|
||||
}
|
||||
|
||||
// TransformToStorage encrypts data to be written to disk using envelope encryption.
|
||||
func (t *envelopeTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
metrics.RecordArrival(metrics.ToStorageLabel, time.Now())
|
||||
newKey, err := generateKey(32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
encKey, err := t.envelopeService.Encrypt(newKey)
|
||||
if err != nil {
|
||||
// Do NOT wrap this err using fmt.Errorf() or similar functions
|
||||
// because this gRPC status error has useful error code when
|
||||
// record the metric.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
transformer, err := t.addTransformer(encKey, newKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result, err := transformer.TransformToStorage(ctx, data, dataCtx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Append the length of the encrypted DEK as the first 2 bytes.
|
||||
b := cryptobyte.NewBuilder(nil)
|
||||
b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
|
||||
b.AddBytes([]byte(encKey))
|
||||
})
|
||||
b.AddBytes(result)
|
||||
|
||||
return b.Bytes()
|
||||
}
|
||||
|
||||
var _ value.Transformer = &envelopeTransformer{}
|
||||
|
||||
// addTransformer inserts a new transformer to the Envelope cache of DEKs for future reads.
|
||||
func (t *envelopeTransformer) addTransformer(encKey []byte, key []byte) (value.Transformer, error) {
|
||||
block, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
transformer, err := t.baseTransformerFunc(block)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Use base64 of encKey as the key into the cache because hashicorp/golang-lru
|
||||
// cannot hash []uint8.
|
||||
if t.cacheEnabled {
|
||||
t.transformers.Add(base64.StdEncoding.EncodeToString(encKey), transformer)
|
||||
metrics.RecordDekCacheFillPercent(float64(t.transformers.Len()) / float64(t.cacheSize))
|
||||
}
|
||||
return transformer, nil
|
||||
}
|
||||
|
||||
// getTransformer fetches the transformer corresponding to encKey from cache, if it exists.
|
||||
func (t *envelopeTransformer) getTransformer(encKey []byte) value.Transformer {
|
||||
if !t.cacheEnabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
_transformer, found := t.transformers.Get(base64.StdEncoding.EncodeToString(encKey))
|
||||
if found {
|
||||
return _transformer.(value.Transformer)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateKey generates a random key using system randomness.
|
||||
func generateKey(length int) (key []byte, err error) {
|
||||
defer func(start time.Time) {
|
||||
value.RecordDataKeyGeneration(start, err)
|
||||
}(time.Now())
|
||||
key = make([]byte, length)
|
||||
if _, err = rand.Read(key); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return key, nil
|
||||
}
|
162
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/grpc_service.go
generated
vendored
162
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/grpc_service.go
generated
vendored
@ -1,162 +0,0 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package envelope transforms values for storage at rest using a Envelope provider
|
||||
package envelope
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/klog/v2"
|
||||
kmsapi "k8s.io/kms/apis/v1beta1"
|
||||
"k8s.io/kms/pkg/util"
|
||||
)
|
||||
|
||||
const (
|
||||
// unixProtocol is the only supported protocol for remote KMS provider.
|
||||
unixProtocol = "unix"
|
||||
// Current version for the protocol interface definition.
|
||||
kmsapiVersion = "v1beta1"
|
||||
|
||||
versionErrorf = "KMS provider api version %s is not supported, only %s is supported now"
|
||||
)
|
||||
|
||||
// The gRPC implementation for envelope.Service.
|
||||
type gRPCService struct {
|
||||
kmsClient kmsapi.KeyManagementServiceClient
|
||||
connection *grpc.ClientConn
|
||||
callTimeout time.Duration
|
||||
mux sync.RWMutex
|
||||
versionChecked bool
|
||||
}
|
||||
|
||||
// NewGRPCService returns an envelope.Service which use gRPC to communicate the remote KMS provider.
|
||||
func NewGRPCService(ctx context.Context, endpoint string, callTimeout time.Duration) (Service, error) {
|
||||
klog.V(4).InfoS("Configure KMS provider", "endpoint", endpoint)
|
||||
|
||||
addr, err := util.ParseEndpoint(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s := &gRPCService{callTimeout: callTimeout}
|
||||
s.connection, err = grpc.Dial(
|
||||
addr,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithUnaryInterceptor(s.interceptor),
|
||||
grpc.WithDefaultCallOptions(grpc.WaitForReady(true)),
|
||||
grpc.WithContextDialer(
|
||||
func(context.Context, string) (net.Conn, error) {
|
||||
// Ignoring addr and timeout arguments:
|
||||
// addr - comes from the closure
|
||||
c, err := net.DialUnix(unixProtocol, nil, &net.UnixAddr{Name: addr})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "failed to create connection to unix socket", "addr", addr)
|
||||
} else {
|
||||
klog.V(4).InfoS("Successfully dialed Unix socket", "addr", addr)
|
||||
}
|
||||
return c, err
|
||||
}))
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create connection to %s, error: %v", endpoint, err)
|
||||
}
|
||||
|
||||
s.kmsClient = kmsapi.NewKeyManagementServiceClient(s.connection)
|
||||
|
||||
go func() {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
<-ctx.Done()
|
||||
_ = s.connection.Close()
|
||||
}()
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (g *gRPCService) checkAPIVersion(ctx context.Context) error {
|
||||
g.mux.Lock()
|
||||
defer g.mux.Unlock()
|
||||
|
||||
if g.versionChecked {
|
||||
return nil
|
||||
}
|
||||
|
||||
request := &kmsapi.VersionRequest{Version: kmsapiVersion}
|
||||
response, err := g.kmsClient.Version(ctx, request)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed get version from remote KMS provider: %v", err)
|
||||
}
|
||||
if response.Version != kmsapiVersion {
|
||||
return fmt.Errorf(versionErrorf, response.Version, kmsapiVersion)
|
||||
}
|
||||
g.versionChecked = true
|
||||
|
||||
klog.V(4).InfoS("KMS provider api version verified", "version", response.Version)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decrypt a given data string to obtain the original byte data.
|
||||
func (g *gRPCService) Decrypt(cipher []byte) ([]byte, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), g.callTimeout)
|
||||
defer cancel()
|
||||
|
||||
request := &kmsapi.DecryptRequest{Cipher: cipher, Version: kmsapiVersion}
|
||||
response, err := g.kmsClient.Decrypt(ctx, request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return response.Plain, nil
|
||||
}
|
||||
|
||||
// Encrypt bytes to a string ciphertext.
|
||||
func (g *gRPCService) Encrypt(plain []byte) ([]byte, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), g.callTimeout)
|
||||
defer cancel()
|
||||
|
||||
request := &kmsapi.EncryptRequest{Plain: plain, Version: kmsapiVersion}
|
||||
response, err := g.kmsClient.Encrypt(ctx, request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return response.Cipher, nil
|
||||
}
|
||||
|
||||
func (g *gRPCService) interceptor(
|
||||
ctx context.Context,
|
||||
method string,
|
||||
req interface{},
|
||||
reply interface{},
|
||||
cc *grpc.ClientConn,
|
||||
invoker grpc.UnaryInvoker,
|
||||
opts ...grpc.CallOption,
|
||||
) error {
|
||||
if !kmsapi.IsVersionCheckMethod(method) {
|
||||
if err := g.checkAPIVersion(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return invoker(ctx, method, req, reply, cc, opts...)
|
||||
}
|
112
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/cache.go
generated
vendored
112
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/cache.go
generated
vendored
@ -1,112 +0,0 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package kmsv2 transforms values for storage at rest using a Envelope v2 provider
|
||||
package kmsv2
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"hash"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
utilcache "k8s.io/apimachinery/pkg/util/cache"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
// simpleCache stores the decryption subset of value.Transformer (value.Read).
|
||||
// this statically enforces that transformers placed in the cache are not used for encryption.
|
||||
// this is relevant in the context of nonce collision since transformers that are created
|
||||
// from encrypted DEKs retrieved from etcd cannot maintain their nonce counter state.
|
||||
type simpleCache struct {
|
||||
cache *utilcache.Expiring
|
||||
ttl time.Duration
|
||||
// hashPool is a per cache pool of hash.Hash (to avoid allocations from building the Hash)
|
||||
// SHA-256 is used to prevent collisions
|
||||
hashPool *sync.Pool
|
||||
providerName string
|
||||
mu sync.Mutex // guards call to set
|
||||
recordCacheSize func(providerName string, size int) // for unit tests
|
||||
}
|
||||
|
||||
func newSimpleCache(clock clock.Clock, ttl time.Duration, providerName string) *simpleCache {
|
||||
cache := utilcache.NewExpiringWithClock(clock)
|
||||
cache.AllowExpiredGet = true // for a given key, the value (the decryptTransformer) is always the same
|
||||
return &simpleCache{
|
||||
cache: cache,
|
||||
ttl: ttl,
|
||||
hashPool: &sync.Pool{
|
||||
New: func() interface{} {
|
||||
return sha256.New()
|
||||
},
|
||||
},
|
||||
providerName: providerName,
|
||||
recordCacheSize: metrics.RecordDekSourceCacheSize,
|
||||
}
|
||||
}
|
||||
|
||||
// given a key, return the transformer, or nil if it does not exist in the cache
|
||||
func (c *simpleCache) get(key []byte) value.Read {
|
||||
record, ok := c.cache.Get(c.keyFunc(key))
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return record.(value.Read)
|
||||
}
|
||||
|
||||
// set caches the record for the key
|
||||
func (c *simpleCache) set(key []byte, transformer value.Read) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if len(key) == 0 {
|
||||
panic("key must not be empty")
|
||||
}
|
||||
if transformer == nil {
|
||||
panic("transformer must not be nil")
|
||||
}
|
||||
c.cache.Set(c.keyFunc(key), transformer, c.ttl)
|
||||
// Add metrics for cache size
|
||||
c.recordCacheSize(c.providerName, c.cache.Len())
|
||||
}
|
||||
|
||||
// keyFunc generates a string key by hashing the inputs.
|
||||
// This lowers the memory requirement of the cache.
|
||||
func (c *simpleCache) keyFunc(s []byte) string {
|
||||
h := c.hashPool.Get().(hash.Hash)
|
||||
h.Reset()
|
||||
|
||||
if _, err := h.Write(s); err != nil {
|
||||
panic(err) // Write() on hash never fails
|
||||
}
|
||||
key := toString(h.Sum(nil)) // skip base64 encoding to save an allocation
|
||||
c.hashPool.Put(h)
|
||||
|
||||
return key
|
||||
}
|
||||
|
||||
// toString performs unholy acts to avoid allocations
|
||||
func toString(b []byte) string {
|
||||
// unsafe.SliceData relies on cap whereas we want to rely on len
|
||||
if len(b) == 0 {
|
||||
return ""
|
||||
}
|
||||
// Copied from go 1.20.1 strings.Builder.String
|
||||
// https://github.com/golang/go/blob/202a1a57064127c3f19d96df57b9f9586145e21c/src/strings/builder.go#L48
|
||||
return unsafe.String(unsafe.SliceData(b), len(b))
|
||||
}
|
528
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go
generated
vendored
528
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/envelope.go
generated
vendored
@ -1,528 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package kmsv2 transforms values for storage at rest using a Envelope v2 provider
|
||||
package kmsv2
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"golang.org/x/crypto/cryptobyte"
|
||||
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/apimachinery/pkg/util/validation"
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
aestransformer "k8s.io/apiserver/pkg/storage/value/encrypt/aes"
|
||||
kmstypes "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2"
|
||||
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
|
||||
"k8s.io/component-base/tracing"
|
||||
"k8s.io/klog/v2"
|
||||
kmsservice "k8s.io/kms/pkg/service"
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
func init() {
|
||||
value.RegisterMetrics()
|
||||
metrics.RegisterMetrics()
|
||||
}
|
||||
|
||||
const (
|
||||
// KMSAPIVersionv2 is a version of the KMS API.
|
||||
KMSAPIVersionv2 = "v2"
|
||||
// KMSAPIVersionv2beta1 is a version of the KMS API.
|
||||
KMSAPIVersionv2beta1 = "v2beta1"
|
||||
// annotationsMaxSize is the maximum size of the annotations.
|
||||
annotationsMaxSize = 32 * 1024 // 32 kB
|
||||
// KeyIDMaxSize is the maximum size of the keyID.
|
||||
KeyIDMaxSize = 1 * 1024 // 1 kB
|
||||
// encryptedDEKSourceMaxSize is the maximum size of the encrypted DEK source.
|
||||
encryptedDEKSourceMaxSize = 1 * 1024 // 1 kB
|
||||
// cacheTTL is the default time-to-live for the cache entry.
|
||||
// this allows the cache to grow to an infinite size for up to a day.
|
||||
// there is unlikely to be any meaningful memory impact on the server
|
||||
// because the cache will likely never have more than a few thousand entries.
|
||||
// each entry can be large due to an internal cache that maps the DEK seed to individual
|
||||
// DEK entries, but that cache has an aggressive TTL to keep the size under control.
|
||||
// with DEK/seed reuse and no storage migration, the number of entries in this cache
|
||||
// would be approximated by unique key IDs used by the KMS plugin
|
||||
// combined with the number of server restarts. If storage migration
|
||||
// is performed after key ID changes, and the number of restarts
|
||||
// is limited, this cache size may be as small as the number of API
|
||||
// servers in use (once old entries expire out from the TTL).
|
||||
cacheTTL = 24 * time.Hour
|
||||
// key ID related error codes for metrics
|
||||
errKeyIDOKCode ErrCodeKeyID = "ok"
|
||||
errKeyIDEmptyCode ErrCodeKeyID = "empty"
|
||||
errKeyIDTooLongCode ErrCodeKeyID = "too_long"
|
||||
)
|
||||
|
||||
// NowFunc is exported so tests can override it.
|
||||
var NowFunc = time.Now
|
||||
|
||||
type StateFunc func() (State, error)
|
||||
type ErrCodeKeyID string
|
||||
|
||||
type State struct {
|
||||
Transformer value.Transformer
|
||||
|
||||
EncryptedObject kmstypes.EncryptedObject
|
||||
|
||||
UID string
|
||||
|
||||
ExpirationTimestamp time.Time
|
||||
|
||||
// CacheKey is the key used to cache the DEK/seed in envelopeTransformer.cache.
|
||||
CacheKey []byte
|
||||
}
|
||||
|
||||
func (s *State) ValidateEncryptCapability() error {
|
||||
if now := NowFunc(); now.After(s.ExpirationTimestamp) {
|
||||
return fmt.Errorf("encryptedDEKSource with keyID hash %q expired at %s (current time is %s)",
|
||||
GetHashIfNotEmpty(s.EncryptedObject.KeyID), s.ExpirationTimestamp.Format(time.RFC3339), now.Format(time.RFC3339))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type envelopeTransformer struct {
|
||||
envelopeService kmsservice.Service
|
||||
providerName string
|
||||
stateFunc StateFunc
|
||||
|
||||
// cache is a thread-safe expiring lru cache which caches decrypted DEKs indexed by their encrypted form.
|
||||
cache *simpleCache
|
||||
apiServerID string
|
||||
}
|
||||
|
||||
// NewEnvelopeTransformer returns a transformer which implements a KEK-DEK based envelope encryption scheme.
|
||||
// It uses envelopeService to encrypt and decrypt DEKs. Respective DEKs (in encrypted form) are prepended to
|
||||
// the data items they encrypt.
|
||||
func NewEnvelopeTransformer(envelopeService kmsservice.Service, providerName string, stateFunc StateFunc, apiServerID string) value.Transformer {
|
||||
return newEnvelopeTransformerWithClock(envelopeService, providerName, stateFunc, apiServerID, cacheTTL, clock.RealClock{})
|
||||
}
|
||||
|
||||
func newEnvelopeTransformerWithClock(envelopeService kmsservice.Service, providerName string, stateFunc StateFunc, apiServerID string, cacheTTL time.Duration, clock clock.Clock) value.Transformer {
|
||||
return &envelopeTransformer{
|
||||
envelopeService: envelopeService,
|
||||
providerName: providerName,
|
||||
stateFunc: stateFunc,
|
||||
cache: newSimpleCache(clock, cacheTTL, providerName),
|
||||
apiServerID: apiServerID,
|
||||
}
|
||||
}
|
||||
|
||||
// TransformFromStorage decrypts data encrypted by this transformer using envelope encryption.
|
||||
func (t *envelopeTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
ctx, span := tracing.Start(ctx, "TransformFromStorage with envelopeTransformer",
|
||||
attribute.String("transformer.provider.name", t.providerName),
|
||||
// The service.instance_id of the apiserver is already available in the trace
|
||||
/*
|
||||
{
|
||||
"key": "service.instance.id",
|
||||
"type": "string",
|
||||
"value": "apiserver-zsteyir5lyrtdcmqqmd5kzze6m"
|
||||
}
|
||||
*/
|
||||
)
|
||||
defer span.End(500 * time.Millisecond)
|
||||
|
||||
span.AddEvent("About to decode encrypted object")
|
||||
// Deserialize the EncryptedObject from the data.
|
||||
encryptedObject, err := t.doDecode(data)
|
||||
if err != nil {
|
||||
span.AddEvent("Decoding encrypted object failed")
|
||||
span.RecordError(err)
|
||||
return nil, false, err
|
||||
}
|
||||
span.AddEvent("Decoded encrypted object")
|
||||
|
||||
useSeed := encryptedObject.EncryptedDEKSourceType == kmstypes.EncryptedDEKSourceType_HKDF_SHA256_XNONCE_AES_GCM_SEED
|
||||
|
||||
// TODO: consider marking state.EncryptedDEK != encryptedObject.EncryptedDEK as a stale read to support DEK defragmentation
|
||||
// at a minimum we should have a metric that helps the user understand if DEK fragmentation is high
|
||||
state, err := t.stateFunc() // no need to call state.ValidateEncryptCapability on reads
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
encryptedObjectCacheKey, err := generateCacheKey(encryptedObject.EncryptedDEKSourceType, encryptedObject.EncryptedDEKSource, encryptedObject.KeyID, encryptedObject.Annotations)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
// Look up the decrypted DEK from cache first
|
||||
transformer := t.cache.get(encryptedObjectCacheKey)
|
||||
|
||||
// fallback to the envelope service if we do not have the transformer locally
|
||||
if transformer == nil {
|
||||
span.AddEvent("About to decrypt DEK using remote service")
|
||||
value.RecordCacheMiss()
|
||||
|
||||
requestInfo := getRequestInfoFromContext(ctx)
|
||||
uid := string(uuid.NewUUID())
|
||||
klog.V(6).InfoS("decrypting content using envelope service", "uid", uid, "key", string(dataCtx.AuthenticatedData()),
|
||||
"group", requestInfo.APIGroup, "version", requestInfo.APIVersion, "resource", requestInfo.Resource, "subresource", requestInfo.Subresource,
|
||||
"verb", requestInfo.Verb, "namespace", requestInfo.Namespace, "name", requestInfo.Name)
|
||||
|
||||
key, err := t.envelopeService.Decrypt(ctx, uid, &kmsservice.DecryptRequest{
|
||||
Ciphertext: encryptedObject.EncryptedDEKSource,
|
||||
KeyID: encryptedObject.KeyID,
|
||||
Annotations: encryptedObject.Annotations,
|
||||
})
|
||||
if err != nil {
|
||||
span.AddEvent("DEK decryption failed")
|
||||
span.RecordError(err)
|
||||
return nil, false, fmt.Errorf("failed to decrypt DEK, error: %w", err)
|
||||
}
|
||||
span.AddEvent("DEK decryption succeeded")
|
||||
|
||||
transformer, err = t.addTransformerForDecryption(encryptedObjectCacheKey, key, useSeed)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
}
|
||||
metrics.RecordKeyID(metrics.FromStorageLabel, t.providerName, encryptedObject.KeyID, t.apiServerID)
|
||||
|
||||
span.AddEvent("About to decrypt data using DEK")
|
||||
out, stale, err := transformer.TransformFromStorage(ctx, encryptedObject.EncryptedData, dataCtx)
|
||||
if err != nil {
|
||||
span.AddEvent("Data decryption failed")
|
||||
span.RecordError(err)
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
span.AddEvent("Data decryption succeeded")
|
||||
// data is considered stale if the key ID does not match our current write transformer
|
||||
return out,
|
||||
stale ||
|
||||
encryptedObject.KeyID != state.EncryptedObject.KeyID ||
|
||||
encryptedObject.EncryptedDEKSourceType != state.EncryptedObject.EncryptedDEKSourceType,
|
||||
nil
|
||||
}
|
||||
|
||||
// TransformToStorage encrypts data to be written to disk using envelope encryption.
|
||||
func (t *envelopeTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
ctx, span := tracing.Start(ctx, "TransformToStorage with envelopeTransformer",
|
||||
attribute.String("transformer.provider.name", t.providerName),
|
||||
// The service.instance_id of the apiserver is already available in the trace
|
||||
/*
|
||||
{
|
||||
"key": "service.instance.id",
|
||||
"type": "string",
|
||||
"value": "apiserver-zsteyir5lyrtdcmqqmd5kzze6m"
|
||||
}
|
||||
*/
|
||||
)
|
||||
defer span.End(500 * time.Millisecond)
|
||||
|
||||
state, err := t.stateFunc()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := state.ValidateEncryptCapability(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// this prevents a cache miss every time the DEK rotates
|
||||
// this has the side benefit of causing the cache to perform a GC
|
||||
// TODO see if we can do this inside the stateFunc control loop
|
||||
t.cache.set(state.CacheKey, state.Transformer)
|
||||
|
||||
requestInfo := getRequestInfoFromContext(ctx)
|
||||
klog.V(6).InfoS("encrypting content using DEK", "uid", state.UID, "key", string(dataCtx.AuthenticatedData()),
|
||||
"group", requestInfo.APIGroup, "version", requestInfo.APIVersion, "resource", requestInfo.Resource, "subresource", requestInfo.Subresource,
|
||||
"verb", requestInfo.Verb, "namespace", requestInfo.Namespace, "name", requestInfo.Name)
|
||||
|
||||
span.AddEvent("About to encrypt data using DEK")
|
||||
result, err := state.Transformer.TransformToStorage(ctx, data, dataCtx)
|
||||
if err != nil {
|
||||
span.AddEvent("Data encryption failed")
|
||||
span.RecordError(err)
|
||||
return nil, err
|
||||
}
|
||||
span.AddEvent("Data encryption succeeded")
|
||||
|
||||
metrics.RecordKeyID(metrics.ToStorageLabel, t.providerName, state.EncryptedObject.KeyID, t.apiServerID)
|
||||
|
||||
encObjectCopy := state.EncryptedObject
|
||||
encObjectCopy.EncryptedData = result
|
||||
|
||||
span.AddEvent("About to encode encrypted object")
|
||||
// Serialize the EncryptedObject to a byte array.
|
||||
out, err := t.doEncode(&encObjectCopy)
|
||||
if err != nil {
|
||||
span.AddEvent("Encoding encrypted object failed")
|
||||
span.RecordError(err)
|
||||
return nil, err
|
||||
}
|
||||
span.AddEvent("Encoded encrypted object")
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// addTransformerForDecryption inserts a new transformer to the Envelope cache of DEKs for future reads.
|
||||
func (t *envelopeTransformer) addTransformerForDecryption(cacheKey []byte, key []byte, useSeed bool) (value.Read, error) {
|
||||
var transformer value.Read
|
||||
var err error
|
||||
if useSeed {
|
||||
// the input key is considered safe to use here because it is coming from the KMS plugin / etcd
|
||||
transformer, err = aestransformer.NewHKDFExtendedNonceGCMTransformer(key)
|
||||
} else {
|
||||
var block cipher.Block
|
||||
block, err = aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// this is compatible with NewGCMTransformerWithUniqueKeyUnsafe for decryption
|
||||
// it would use random nonces for encryption but we never do that
|
||||
transformer, err = aestransformer.NewGCMTransformer(block)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t.cache.set(cacheKey, transformer)
|
||||
return transformer, nil
|
||||
}
|
||||
|
||||
// doEncode encodes the EncryptedObject to a byte array.
|
||||
func (t *envelopeTransformer) doEncode(request *kmstypes.EncryptedObject) ([]byte, error) {
|
||||
if err := ValidateEncryptedObject(request); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return proto.Marshal(request)
|
||||
}
|
||||
|
||||
// doDecode decodes the byte array to an EncryptedObject.
|
||||
func (t *envelopeTransformer) doDecode(originalData []byte) (*kmstypes.EncryptedObject, error) {
|
||||
o := &kmstypes.EncryptedObject{}
|
||||
if err := proto.Unmarshal(originalData, o); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ValidateEncryptedObject(o); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// GenerateTransformer generates a new transformer and encrypts the DEK/seed using the envelope service.
|
||||
// It returns the transformer, the encrypted DEK/seed, cache key and error.
|
||||
func GenerateTransformer(ctx context.Context, uid string, envelopeService kmsservice.Service, useSeed bool) (value.Transformer, *kmstypes.EncryptedObject, []byte, error) {
|
||||
newTransformerFunc := func() (value.Transformer, []byte, error) {
|
||||
seed, err := aestransformer.GenerateKey(aestransformer.MinSeedSizeExtendedNonceGCM)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
transformer, err := aestransformer.NewHKDFExtendedNonceGCMTransformer(seed)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return transformer, seed, nil
|
||||
}
|
||||
if !useSeed {
|
||||
newTransformerFunc = aestransformer.NewGCMTransformerWithUniqueKeyUnsafe
|
||||
}
|
||||
transformer, newKey, err := newTransformerFunc()
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
klog.V(6).InfoS("encrypting content using envelope service", "uid", uid)
|
||||
|
||||
resp, err := envelopeService.Encrypt(ctx, uid, newKey)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("failed to encrypt DEK, error: %w", err)
|
||||
}
|
||||
|
||||
o := &kmstypes.EncryptedObject{
|
||||
KeyID: resp.KeyID,
|
||||
EncryptedDEKSource: resp.Ciphertext,
|
||||
EncryptedData: []byte{0}, // any non-empty value to pass validation
|
||||
Annotations: resp.Annotations,
|
||||
}
|
||||
|
||||
if useSeed {
|
||||
o.EncryptedDEKSourceType = kmstypes.EncryptedDEKSourceType_HKDF_SHA256_XNONCE_AES_GCM_SEED
|
||||
} else {
|
||||
o.EncryptedDEKSourceType = kmstypes.EncryptedDEKSourceType_AES_GCM_KEY
|
||||
}
|
||||
|
||||
if err := ValidateEncryptedObject(o); err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
cacheKey, err := generateCacheKey(o.EncryptedDEKSourceType, resp.Ciphertext, resp.KeyID, resp.Annotations)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
o.EncryptedData = nil // make sure that later code that uses this encrypted object sets this field
|
||||
|
||||
return transformer, o, cacheKey, nil
|
||||
}
|
||||
|
||||
func ValidateEncryptedObject(o *kmstypes.EncryptedObject) error {
|
||||
if o == nil {
|
||||
return fmt.Errorf("encrypted object is nil")
|
||||
}
|
||||
switch t := o.EncryptedDEKSourceType; t {
|
||||
case kmstypes.EncryptedDEKSourceType_AES_GCM_KEY:
|
||||
case kmstypes.EncryptedDEKSourceType_HKDF_SHA256_XNONCE_AES_GCM_SEED:
|
||||
default:
|
||||
return fmt.Errorf("unknown encryptedDEKSourceType: %d", t)
|
||||
}
|
||||
if len(o.EncryptedData) == 0 {
|
||||
return fmt.Errorf("encrypted data is empty")
|
||||
}
|
||||
if err := validateEncryptedDEKSource(o.EncryptedDEKSource); err != nil {
|
||||
return fmt.Errorf("failed to validate encrypted DEK source: %w", err)
|
||||
}
|
||||
if _, err := ValidateKeyID(o.KeyID); err != nil {
|
||||
return fmt.Errorf("failed to validate key id: %w", err)
|
||||
}
|
||||
if err := validateAnnotations(o.Annotations); err != nil {
|
||||
return fmt.Errorf("failed to validate annotations: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateEncryptedDEKSource tests the following:
|
||||
// 1. The encrypted DEK source is not empty.
|
||||
// 2. The size of encrypted DEK source is less than 1 kB.
|
||||
func validateEncryptedDEKSource(encryptedDEKSource []byte) error {
|
||||
if len(encryptedDEKSource) == 0 {
|
||||
return fmt.Errorf("encrypted DEK source is empty")
|
||||
}
|
||||
if len(encryptedDEKSource) > encryptedDEKSourceMaxSize {
|
||||
return fmt.Errorf("encrypted DEK source is %d bytes, which exceeds the max size of %d", len(encryptedDEKSource), encryptedDEKSourceMaxSize)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateAnnotations tests the following:
|
||||
// 1. checks if the annotation key is fully qualified
|
||||
// 2. The size of annotations keys + values is less than 32 kB.
|
||||
func validateAnnotations(annotations map[string][]byte) error {
|
||||
var errs []error
|
||||
var totalSize uint64
|
||||
for k, v := range annotations {
|
||||
if fieldErr := validation.IsFullyQualifiedDomainName(field.NewPath("annotations"), k); fieldErr != nil {
|
||||
errs = append(errs, fieldErr.ToAggregate())
|
||||
}
|
||||
totalSize += uint64(len(k)) + uint64(len(v))
|
||||
}
|
||||
if totalSize > annotationsMaxSize {
|
||||
errs = append(errs, fmt.Errorf("total size of annotations is %d, which exceeds the max size of %d", totalSize, annotationsMaxSize))
|
||||
}
|
||||
return utilerrors.NewAggregate(errs)
|
||||
}
|
||||
|
||||
// ValidateKeyID tests the following:
|
||||
// 1. The keyID is not empty.
|
||||
// 2. The size of keyID is less than 1 kB.
|
||||
func ValidateKeyID(keyID string) (ErrCodeKeyID, error) {
|
||||
if len(keyID) == 0 {
|
||||
return errKeyIDEmptyCode, fmt.Errorf("keyID is empty")
|
||||
}
|
||||
if len(keyID) > KeyIDMaxSize {
|
||||
return errKeyIDTooLongCode, fmt.Errorf("keyID is %d bytes, which exceeds the max size of %d", len(keyID), KeyIDMaxSize)
|
||||
}
|
||||
return errKeyIDOKCode, nil
|
||||
}
|
||||
|
||||
func getRequestInfoFromContext(ctx context.Context) *genericapirequest.RequestInfo {
|
||||
if reqInfo, found := genericapirequest.RequestInfoFrom(ctx); found {
|
||||
return reqInfo
|
||||
}
|
||||
return &genericapirequest.RequestInfo{}
|
||||
}
|
||||
|
||||
// generateCacheKey returns a key for the cache.
|
||||
// The key is a concatenation of:
|
||||
// 0. encryptedDEKSourceType
|
||||
// 1. encryptedDEKSource
|
||||
// 2. keyID
|
||||
// 3. length of annotations
|
||||
// 4. annotations (sorted by key) - each annotation is a concatenation of:
|
||||
// a. annotation key
|
||||
// b. annotation value
|
||||
func generateCacheKey(encryptedDEKSourceType kmstypes.EncryptedDEKSourceType, encryptedDEKSource []byte, keyID string, annotations map[string][]byte) ([]byte, error) {
|
||||
// TODO(aramase): use sync pool buffer to avoid allocations
|
||||
b := cryptobyte.NewBuilder(nil)
|
||||
b.AddUint32(uint32(encryptedDEKSourceType))
|
||||
b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
|
||||
b.AddBytes(encryptedDEKSource)
|
||||
})
|
||||
b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
|
||||
b.AddBytes(toBytes(keyID))
|
||||
})
|
||||
if len(annotations) == 0 {
|
||||
return b.Bytes()
|
||||
}
|
||||
|
||||
// add the length of annotations to the cache key
|
||||
b.AddUint32(uint32(len(annotations)))
|
||||
|
||||
// Sort the annotations by key.
|
||||
keys := make([]string, 0, len(annotations))
|
||||
for k := range annotations {
|
||||
k := k
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
// The maximum size of annotations is annotationsMaxSize (32 kB) so we can safely
|
||||
// assume that the length of the key and value will fit in a uint16.
|
||||
b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
|
||||
b.AddBytes(toBytes(k))
|
||||
})
|
||||
b.AddUint16LengthPrefixed(func(b *cryptobyte.Builder) {
|
||||
b.AddBytes(annotations[k])
|
||||
})
|
||||
}
|
||||
|
||||
return b.Bytes()
|
||||
}
|
||||
|
||||
// toBytes performs unholy acts to avoid allocations
|
||||
func toBytes(s string) []byte {
|
||||
// unsafe.StringData is unspecified for the empty string, so we provide a strict interpretation
|
||||
if len(s) == 0 {
|
||||
return nil
|
||||
}
|
||||
// Copied from go 1.20.1 os.File.WriteString
|
||||
// https://github.com/golang/go/blob/202a1a57064127c3f19d96df57b9f9586145e21c/src/os/file.go#L246
|
||||
return unsafe.Slice(unsafe.StringData(s), len(s))
|
||||
}
|
||||
|
||||
// GetHashIfNotEmpty returns the sha256 hash of the data if it is not empty.
|
||||
func GetHashIfNotEmpty(data string) string {
|
||||
if len(data) > 0 {
|
||||
return fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(data)))
|
||||
}
|
||||
return ""
|
||||
}
|
154
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/grpc_service.go
generated
vendored
154
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/grpc_service.go
generated
vendored
@ -1,154 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package kmsv2 transforms values for storage at rest using a Envelope provider
|
||||
package kmsv2
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
|
||||
"k8s.io/klog/v2"
|
||||
kmsapi "k8s.io/kms/apis/v2"
|
||||
kmsservice "k8s.io/kms/pkg/service"
|
||||
"k8s.io/kms/pkg/util"
|
||||
)
|
||||
|
||||
const (
|
||||
// unixProtocol is the only supported protocol for remote KMS provider.
|
||||
unixProtocol = "unix"
|
||||
)
|
||||
|
||||
// The gRPC implementation for envelope.Service.
|
||||
type gRPCService struct {
|
||||
kmsClient kmsapi.KeyManagementServiceClient
|
||||
connection *grpc.ClientConn
|
||||
callTimeout time.Duration
|
||||
}
|
||||
|
||||
// NewGRPCService returns an envelope.Service which use gRPC to communicate the remote KMS provider.
|
||||
func NewGRPCService(ctx context.Context, endpoint, providerName string, callTimeout time.Duration) (kmsservice.Service, error) {
|
||||
klog.V(4).InfoS("Configure KMS provider", "endpoint", endpoint)
|
||||
|
||||
addr, err := util.ParseEndpoint(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s := &gRPCService{callTimeout: callTimeout}
|
||||
s.connection, err = grpc.Dial(
|
||||
addr,
|
||||
grpc.WithAuthority("localhost"),
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithDefaultCallOptions(grpc.WaitForReady(true)),
|
||||
grpc.WithContextDialer(
|
||||
func(context.Context, string) (net.Conn, error) {
|
||||
// Ignoring addr and timeout arguments:
|
||||
// addr - comes from the closure
|
||||
c, err := net.DialUnix(unixProtocol, nil, &net.UnixAddr{Name: addr})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "failed to create connection to unix socket", "addr", addr)
|
||||
} else {
|
||||
klog.V(4).InfoS("Successfully dialed Unix socket", "addr", addr)
|
||||
}
|
||||
return c, err
|
||||
}),
|
||||
grpc.WithChainUnaryInterceptor(recordMetricsInterceptor(providerName)),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create connection to %s, error: %v", endpoint, err)
|
||||
}
|
||||
|
||||
s.kmsClient = kmsapi.NewKeyManagementServiceClient(s.connection)
|
||||
|
||||
go func() {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
<-ctx.Done()
|
||||
_ = s.connection.Close()
|
||||
}()
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Decrypt a given data string to obtain the original byte data.
|
||||
func (g *gRPCService) Decrypt(ctx context.Context, uid string, req *kmsservice.DecryptRequest) ([]byte, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, g.callTimeout)
|
||||
defer cancel()
|
||||
|
||||
request := &kmsapi.DecryptRequest{
|
||||
Ciphertext: req.Ciphertext,
|
||||
Uid: uid,
|
||||
KeyId: req.KeyID,
|
||||
Annotations: req.Annotations,
|
||||
}
|
||||
response, err := g.kmsClient.Decrypt(ctx, request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return response.Plaintext, nil
|
||||
}
|
||||
|
||||
// Encrypt bytes to a string ciphertext.
|
||||
func (g *gRPCService) Encrypt(ctx context.Context, uid string, plaintext []byte) (*kmsservice.EncryptResponse, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, g.callTimeout)
|
||||
defer cancel()
|
||||
|
||||
request := &kmsapi.EncryptRequest{
|
||||
Plaintext: plaintext,
|
||||
Uid: uid,
|
||||
}
|
||||
response, err := g.kmsClient.Encrypt(ctx, request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &kmsservice.EncryptResponse{
|
||||
Ciphertext: response.Ciphertext,
|
||||
KeyID: response.KeyId,
|
||||
Annotations: response.Annotations,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Status returns the status of the KMSv2 provider.
|
||||
func (g *gRPCService) Status(ctx context.Context) (*kmsservice.StatusResponse, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, g.callTimeout)
|
||||
defer cancel()
|
||||
|
||||
request := &kmsapi.StatusRequest{}
|
||||
response, err := g.kmsClient.Status(ctx, request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &kmsservice.StatusResponse{Version: response.Version, Healthz: response.Healthz, KeyID: response.KeyId}, nil
|
||||
}
|
||||
|
||||
func recordMetricsInterceptor(providerName string) grpc.UnaryClientInterceptor {
|
||||
return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
|
||||
start := NowFunc()
|
||||
respErr := invoker(ctx, method, req, reply, cc, opts...)
|
||||
elapsed := NowFunc().Sub(start)
|
||||
metrics.RecordKMSOperationLatency(providerName, method, elapsed, respErr)
|
||||
return respErr
|
||||
}
|
||||
}
|
9
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/OWNERS
generated
vendored
9
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/OWNERS
generated
vendored
@ -1,9 +0,0 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
# Disable inheritance as this is an api owners file
|
||||
options:
|
||||
no_parent_owners: true
|
||||
approvers:
|
||||
- api-approvers
|
||||
reviewers:
|
||||
- sig-auth-api-reviewers
|
186
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/api.pb.go
generated
vendored
186
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/api.pb.go
generated
vendored
@ -1,186 +0,0 @@
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by protoc-gen-gogo. DO NOT EDIT.
|
||||
// source: api.proto
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
fmt "fmt"
|
||||
proto "github.com/gogo/protobuf/proto"
|
||||
math "math"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
var _ = fmt.Errorf
|
||||
var _ = math.Inf
|
||||
|
||||
// This is a compile-time assertion to ensure that this generated file
|
||||
// is compatible with the proto package it is being compiled against.
|
||||
// A compilation error at this line likely means your copy of the
|
||||
// proto package needs to be updated.
|
||||
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
|
||||
|
||||
type EncryptedDEKSourceType int32
|
||||
|
||||
const (
|
||||
// AES_GCM_KEY means that the plaintext of encryptedDEKSource is the DEK itself, with AES-GCM as the encryption algorithm.
|
||||
EncryptedDEKSourceType_AES_GCM_KEY EncryptedDEKSourceType = 0
|
||||
// HKDF_SHA256_XNONCE_AES_GCM_SEED means that the plaintext of encryptedDEKSource is the pseudo random key
|
||||
// (referred to as the seed throughout the code) that is fed into HKDF expand. SHA256 is the hash algorithm
|
||||
// and first 32 bytes of encryptedData are the info param. The first 32 bytes from the HKDF stream are used
|
||||
// as the DEK with AES-GCM as the encryption algorithm.
|
||||
EncryptedDEKSourceType_HKDF_SHA256_XNONCE_AES_GCM_SEED EncryptedDEKSourceType = 1
|
||||
)
|
||||
|
||||
var EncryptedDEKSourceType_name = map[int32]string{
|
||||
0: "AES_GCM_KEY",
|
||||
1: "HKDF_SHA256_XNONCE_AES_GCM_SEED",
|
||||
}
|
||||
|
||||
var EncryptedDEKSourceType_value = map[string]int32{
|
||||
"AES_GCM_KEY": 0,
|
||||
"HKDF_SHA256_XNONCE_AES_GCM_SEED": 1,
|
||||
}
|
||||
|
||||
func (x EncryptedDEKSourceType) String() string {
|
||||
return proto.EnumName(EncryptedDEKSourceType_name, int32(x))
|
||||
}
|
||||
|
||||
func (EncryptedDEKSourceType) EnumDescriptor() ([]byte, []int) {
|
||||
return fileDescriptor_00212fb1f9d3bf1c, []int{0}
|
||||
}
|
||||
|
||||
// EncryptedObject is the representation of data stored in etcd after envelope encryption.
|
||||
type EncryptedObject struct {
|
||||
// EncryptedData is the encrypted data.
|
||||
EncryptedData []byte `protobuf:"bytes,1,opt,name=encryptedData,proto3" json:"encryptedData,omitempty"`
|
||||
// KeyID is the KMS key ID used for encryption operations.
|
||||
// keyID must satisfy the following constraints:
|
||||
// 1. The keyID is not empty.
|
||||
// 2. The size of keyID is less than 1 kB.
|
||||
KeyID string `protobuf:"bytes,2,opt,name=keyID,proto3" json:"keyID,omitempty"`
|
||||
// EncryptedDEKSource is the ciphertext of the source of the DEK used to encrypt the data stored in encryptedData.
|
||||
// encryptedDEKSourceType defines the process of using the plaintext of this field to determine the aforementioned DEK.
|
||||
// encryptedDEKSource must satisfy the following constraints:
|
||||
// 1. The encrypted DEK source is not empty.
|
||||
// 2. The size of encrypted DEK source is less than 1 kB.
|
||||
EncryptedDEKSource []byte `protobuf:"bytes,3,opt,name=encryptedDEKSource,proto3" json:"encryptedDEKSource,omitempty"`
|
||||
// Annotations is additional metadata that was provided by the KMS plugin.
|
||||
// Annotations must satisfy the following constraints:
|
||||
// 1. Annotation key must be a fully qualified domain name that conforms to the definition in DNS (RFC 1123).
|
||||
// 2. The size of annotations keys + values is less than 32 kB.
|
||||
Annotations map[string][]byte `protobuf:"bytes,4,rep,name=annotations,proto3" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
|
||||
// encryptedDEKSourceType defines the process of using the plaintext of encryptedDEKSource to determine the DEK.
|
||||
EncryptedDEKSourceType EncryptedDEKSourceType `protobuf:"varint,5,opt,name=encryptedDEKSourceType,proto3,enum=v2.EncryptedDEKSourceType" json:"encryptedDEKSourceType,omitempty"`
|
||||
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
XXX_sizecache int32 `json:"-"`
|
||||
}
|
||||
|
||||
func (m *EncryptedObject) Reset() { *m = EncryptedObject{} }
|
||||
func (m *EncryptedObject) String() string { return proto.CompactTextString(m) }
|
||||
func (*EncryptedObject) ProtoMessage() {}
|
||||
func (*EncryptedObject) Descriptor() ([]byte, []int) {
|
||||
return fileDescriptor_00212fb1f9d3bf1c, []int{0}
|
||||
}
|
||||
func (m *EncryptedObject) XXX_Unmarshal(b []byte) error {
|
||||
return xxx_messageInfo_EncryptedObject.Unmarshal(m, b)
|
||||
}
|
||||
func (m *EncryptedObject) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
|
||||
return xxx_messageInfo_EncryptedObject.Marshal(b, m, deterministic)
|
||||
}
|
||||
func (m *EncryptedObject) XXX_Merge(src proto.Message) {
|
||||
xxx_messageInfo_EncryptedObject.Merge(m, src)
|
||||
}
|
||||
func (m *EncryptedObject) XXX_Size() int {
|
||||
return xxx_messageInfo_EncryptedObject.Size(m)
|
||||
}
|
||||
func (m *EncryptedObject) XXX_DiscardUnknown() {
|
||||
xxx_messageInfo_EncryptedObject.DiscardUnknown(m)
|
||||
}
|
||||
|
||||
var xxx_messageInfo_EncryptedObject proto.InternalMessageInfo
|
||||
|
||||
func (m *EncryptedObject) GetEncryptedData() []byte {
|
||||
if m != nil {
|
||||
return m.EncryptedData
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *EncryptedObject) GetKeyID() string {
|
||||
if m != nil {
|
||||
return m.KeyID
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *EncryptedObject) GetEncryptedDEKSource() []byte {
|
||||
if m != nil {
|
||||
return m.EncryptedDEKSource
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *EncryptedObject) GetAnnotations() map[string][]byte {
|
||||
if m != nil {
|
||||
return m.Annotations
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *EncryptedObject) GetEncryptedDEKSourceType() EncryptedDEKSourceType {
|
||||
if m != nil {
|
||||
return m.EncryptedDEKSourceType
|
||||
}
|
||||
return EncryptedDEKSourceType_AES_GCM_KEY
|
||||
}
|
||||
|
||||
func init() {
|
||||
proto.RegisterEnum("v2.EncryptedDEKSourceType", EncryptedDEKSourceType_name, EncryptedDEKSourceType_value)
|
||||
proto.RegisterType((*EncryptedObject)(nil), "v2.EncryptedObject")
|
||||
proto.RegisterMapType((map[string][]byte)(nil), "v2.EncryptedObject.AnnotationsEntry")
|
||||
}
|
||||
|
||||
func init() { proto.RegisterFile("api.proto", fileDescriptor_00212fb1f9d3bf1c) }
|
||||
|
||||
var fileDescriptor_00212fb1f9d3bf1c = []byte{
|
||||
// 329 bytes of a gzipped FileDescriptorProto
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x74, 0x91, 0xe1, 0x4b, 0xc2, 0x40,
|
||||
0x18, 0xc6, 0xdb, 0xcc, 0xc0, 0xd3, 0x72, 0x1c, 0x21, 0xc3, 0x2f, 0x8d, 0xf2, 0xc3, 0xe8, 0xc3,
|
||||
0x0e, 0x16, 0x85, 0x44, 0x08, 0xe6, 0xce, 0x0c, 0x49, 0x61, 0xeb, 0x43, 0xf5, 0x65, 0x9c, 0xf6,
|
||||
0x22, 0x6b, 0xb6, 0x1b, 0xb7, 0xf3, 0x60, 0x7f, 0x6a, 0xff, 0x4d, 0x38, 0x13, 0xd3, 0xec, 0xdb,
|
||||
0xbd, 0xef, 0xfd, 0xde, 0xe7, 0xb9, 0x7b, 0x5e, 0x54, 0x61, 0x69, 0xe4, 0xa4, 0x82, 0x4b, 0x8e,
|
||||
0x75, 0xe5, 0x9e, 0x7f, 0xe9, 0xa8, 0x4e, 0x93, 0xa9, 0xc8, 0x53, 0x09, 0xef, 0xe3, 0xc9, 0x07,
|
||||
0x4c, 0x25, 0x6e, 0xa1, 0x63, 0x58, 0xb7, 0x3c, 0x26, 0x99, 0xa9, 0x59, 0x9a, 0x5d, 0xf3, 0xb7,
|
||||
0x9b, 0xf8, 0x14, 0x95, 0x63, 0xc8, 0x1f, 0x3d, 0x53, 0xb7, 0x34, 0xbb, 0xe2, 0xaf, 0x0a, 0xec,
|
||||
0x20, 0xbc, 0xc1, 0xe8, 0x30, 0xe0, 0x0b, 0x31, 0x05, 0xb3, 0x54, 0x08, 0xec, 0xb9, 0xc1, 0x7d,
|
||||
0x54, 0x65, 0x49, 0xc2, 0x25, 0x93, 0x11, 0x4f, 0x32, 0xf3, 0xd0, 0x2a, 0xd9, 0x55, 0xb7, 0xe5,
|
||||
0x28, 0xd7, 0xd9, 0x79, 0x95, 0xd3, 0xdd, 0x60, 0x34, 0x91, 0x22, 0xf7, 0x7f, 0x0f, 0x62, 0x1f,
|
||||
0x35, 0xfe, 0xaa, 0x3f, 0xe7, 0x29, 0x98, 0x65, 0x4b, 0xb3, 0x4f, 0xdc, 0xe6, 0x96, 0xe4, 0x16,
|
||||
0xe1, 0xff, 0x33, 0xd9, 0xec, 0x20, 0x63, 0xd7, 0x14, 0x1b, 0xa8, 0x14, 0x43, 0x5e, 0x24, 0x52,
|
||||
0xf1, 0x97, 0xc7, 0x65, 0x0e, 0x8a, 0xcd, 0x17, 0x50, 0xe4, 0x50, 0xf3, 0x57, 0xc5, 0xad, 0xde,
|
||||
0xd6, 0x2e, 0x47, 0xa8, 0xb1, 0xdf, 0x11, 0xd7, 0x51, 0xb5, 0x4b, 0x83, 0xf0, 0xa1, 0xf7, 0x14,
|
||||
0x0e, 0xe9, 0xab, 0x71, 0x80, 0x2f, 0xd0, 0xd9, 0x60, 0xe8, 0xf5, 0xc3, 0x60, 0xd0, 0x75, 0xaf,
|
||||
0x6f, 0xc2, 0x97, 0xd1, 0x78, 0xd4, 0xa3, 0xe1, 0x9a, 0x09, 0x28, 0xf5, 0x0c, 0xed, 0xbe, 0xf3,
|
||||
0x76, 0x17, 0xb7, 0x33, 0x27, 0xe2, 0x84, 0xa5, 0x51, 0x06, 0x42, 0x81, 0x20, 0x69, 0x3c, 0x23,
|
||||
0x99, 0xe4, 0x82, 0xcd, 0x80, 0x14, 0xce, 0xe4, 0xe7, 0x33, 0x04, 0x12, 0x05, 0x73, 0x9e, 0x02,
|
||||
0x89, 0x3f, 0x33, 0xe5, 0x12, 0xe5, 0x4e, 0x8e, 0x8a, 0xb5, 0x5f, 0x7d, 0x07, 0x00, 0x00, 0xff,
|
||||
0xff, 0xcc, 0x0f, 0x2b, 0x2e, 0x03, 0x02, 0x00, 0x00,
|
||||
}
|
60
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/api.proto
generated
vendored
60
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/api.proto
generated
vendored
@ -1,60 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// To regenerate api.pb.go run `hack/update-codegen.sh protobindings`
|
||||
syntax = "proto3";
|
||||
|
||||
package v2;
|
||||
option go_package = "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2";
|
||||
|
||||
// EncryptedObject is the representation of data stored in etcd after envelope encryption.
|
||||
message EncryptedObject {
|
||||
// EncryptedData is the encrypted data.
|
||||
bytes encryptedData = 1;
|
||||
|
||||
// KeyID is the KMS key ID used for encryption operations.
|
||||
// keyID must satisfy the following constraints:
|
||||
// 1. The keyID is not empty.
|
||||
// 2. The size of keyID is less than 1 kB.
|
||||
string keyID = 2;
|
||||
|
||||
// EncryptedDEKSource is the ciphertext of the source of the DEK used to encrypt the data stored in encryptedData.
|
||||
// encryptedDEKSourceType defines the process of using the plaintext of this field to determine the aforementioned DEK.
|
||||
// encryptedDEKSource must satisfy the following constraints:
|
||||
// 1. The encrypted DEK source is not empty.
|
||||
// 2. The size of encrypted DEK source is less than 1 kB.
|
||||
bytes encryptedDEKSource = 3;
|
||||
|
||||
// Annotations is additional metadata that was provided by the KMS plugin.
|
||||
// Annotations must satisfy the following constraints:
|
||||
// 1. Annotation key must be a fully qualified domain name that conforms to the definition in DNS (RFC 1123).
|
||||
// 2. The size of annotations keys + values is less than 32 kB.
|
||||
map<string, bytes> annotations = 4;
|
||||
|
||||
// encryptedDEKSourceType defines the process of using the plaintext of encryptedDEKSource to determine the DEK.
|
||||
EncryptedDEKSourceType encryptedDEKSourceType = 5;
|
||||
}
|
||||
|
||||
enum EncryptedDEKSourceType {
|
||||
// AES_GCM_KEY means that the plaintext of encryptedDEKSource is the DEK itself, with AES-GCM as the encryption algorithm.
|
||||
AES_GCM_KEY = 0;
|
||||
|
||||
// HKDF_SHA256_XNONCE_AES_GCM_SEED means that the plaintext of encryptedDEKSource is the pseudo random key
|
||||
// (referred to as the seed throughout the code) that is fed into HKDF expand. SHA256 is the hash algorithm
|
||||
// and first 32 bytes of encryptedData are the info param. The first 32 bytes from the HKDF stream are used
|
||||
// as the DEK with AES-GCM as the encryption algorithm.
|
||||
HKDF_SHA256_XNONCE_AES_GCM_SEED = 1;
|
||||
}
|
18
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/v2.go
generated
vendored
18
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2/v2.go
generated
vendored
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package v2 contains definition of kms-plugin's serialized types.
|
||||
package v2
|
322
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go
generated
vendored
322
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics/metrics.go
generated
vendored
@ -1,322 +0,0 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/lru"
|
||||
)
|
||||
|
||||
const (
|
||||
namespace = "apiserver"
|
||||
subsystem = "envelope_encryption"
|
||||
FromStorageLabel = "from_storage"
|
||||
ToStorageLabel = "to_storage"
|
||||
)
|
||||
|
||||
type metricLabels struct {
|
||||
transformationType string
|
||||
providerName string
|
||||
keyIDHash string
|
||||
apiServerIDHash string
|
||||
}
|
||||
|
||||
/*
|
||||
* By default, all the following metrics are defined as falling under
|
||||
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
|
||||
*
|
||||
* Promoting the stability level of the metric is a responsibility of the component owner, since it
|
||||
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
|
||||
* the metric stability policy.
|
||||
*/
|
||||
var (
|
||||
lockLastFromStorage sync.Mutex
|
||||
lockLastToStorage sync.Mutex
|
||||
lockRecordKeyID sync.Mutex
|
||||
lockRecordKeyIDStatus sync.Mutex
|
||||
|
||||
lastFromStorage time.Time
|
||||
lastToStorage time.Time
|
||||
keyIDHashTotalMetricLabels *lru.Cache
|
||||
keyIDHashStatusLastTimestampSecondsMetricLabels *lru.Cache
|
||||
cacheSize = 100
|
||||
|
||||
// This metric is only used for KMS v1 API.
|
||||
dekCacheFillPercent = metrics.NewGauge(
|
||||
&metrics.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dek_cache_fill_percent",
|
||||
Help: "Percent of the cache slots currently occupied by cached DEKs.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
|
||||
// This metric is only used for KMS v1 API.
|
||||
dekCacheInterArrivals = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dek_cache_inter_arrival_time_seconds",
|
||||
Help: "Time (in seconds) of inter arrival of transformation requests.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
Buckets: metrics.ExponentialBuckets(60, 2, 10),
|
||||
},
|
||||
[]string{"transformation_type"},
|
||||
)
|
||||
|
||||
// These metrics are made public to be used by unit tests.
|
||||
KMSOperationsLatencyMetric = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "kms_operations_latency_seconds",
|
||||
Help: "KMS operation duration with gRPC error code status total.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
// Use custom buckets to avoid the default buckets which are too small for KMS operations.
|
||||
// Start 0.1ms with the last bucket being [~52s, +Inf)
|
||||
Buckets: metrics.ExponentialBuckets(0.0001, 2, 20),
|
||||
},
|
||||
[]string{"provider_name", "method_name", "grpc_status_code"},
|
||||
)
|
||||
|
||||
// keyIDHashTotal is the number of times a keyID is used
|
||||
// e.g. apiserver_envelope_encryption_key_id_hash_total counter
|
||||
// apiserver_envelope_encryption_key_id_hash_total{apiserver_id_hash="sha256",key_id_hash="sha256",
|
||||
// provider_name="providerName",transformation_type="from_storage"} 1
|
||||
KeyIDHashTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "key_id_hash_total",
|
||||
Help: "Number of times a keyID is used split by transformation type, provider, and apiserver identity.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"transformation_type", "provider_name", "key_id_hash", "apiserver_id_hash"},
|
||||
)
|
||||
|
||||
// keyIDHashLastTimestampSeconds is the last time in seconds when a keyID was used
|
||||
// e.g. apiserver_envelope_encryption_key_id_hash_last_timestamp_seconds{apiserver_id_hash="sha256",key_id_hash="sha256", provider_name="providerName",transformation_type="from_storage"} 1.674865558833728e+09
|
||||
KeyIDHashLastTimestampSeconds = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "key_id_hash_last_timestamp_seconds",
|
||||
Help: "The last time in seconds when a keyID was used.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"transformation_type", "provider_name", "key_id_hash", "apiserver_id_hash"},
|
||||
)
|
||||
|
||||
// keyIDHashStatusLastTimestampSeconds is the last time in seconds when a keyID was returned by the Status RPC call.
|
||||
// e.g. apiserver_envelope_encryption_key_id_hash_status_last_timestamp_seconds{apiserver_id_hash="sha256",key_id_hash="sha256", provider_name="providerName"} 1.674865558833728e+09
|
||||
KeyIDHashStatusLastTimestampSeconds = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "key_id_hash_status_last_timestamp_seconds",
|
||||
Help: "The last time in seconds when a keyID was returned by the Status RPC call.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"provider_name", "key_id_hash", "apiserver_id_hash"},
|
||||
)
|
||||
|
||||
InvalidKeyIDFromStatusTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "invalid_key_id_from_status_total",
|
||||
Help: "Number of times an invalid keyID is returned by the Status RPC call split by error.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"provider_name", "error"},
|
||||
)
|
||||
|
||||
DekSourceCacheSize = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dek_source_cache_size",
|
||||
Help: "Number of records in data encryption key (DEK) source cache. On a restart, this value is an approximation of the number of decrypt RPC calls the server will make to the KMS plugin.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"provider_name"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetricsFunc sync.Once
|
||||
var hashPool *sync.Pool
|
||||
|
||||
func registerLRUMetrics() {
|
||||
if keyIDHashTotalMetricLabels != nil {
|
||||
keyIDHashTotalMetricLabels.Clear()
|
||||
}
|
||||
if keyIDHashStatusLastTimestampSecondsMetricLabels != nil {
|
||||
keyIDHashStatusLastTimestampSecondsMetricLabels.Clear()
|
||||
}
|
||||
|
||||
keyIDHashTotalMetricLabels = lru.NewWithEvictionFunc(cacheSize, func(key lru.Key, _ interface{}) {
|
||||
item := key.(metricLabels)
|
||||
if deleted := KeyIDHashTotal.DeleteLabelValues(item.transformationType, item.providerName, item.keyIDHash, item.apiServerIDHash); deleted {
|
||||
klog.InfoS("Deleted keyIDHashTotalMetricLabels", "transformationType", item.transformationType,
|
||||
"providerName", item.providerName, "keyIDHash", item.keyIDHash, "apiServerIDHash", item.apiServerIDHash)
|
||||
}
|
||||
if deleted := KeyIDHashLastTimestampSeconds.DeleteLabelValues(item.transformationType, item.providerName, item.keyIDHash, item.apiServerIDHash); deleted {
|
||||
klog.InfoS("Deleted keyIDHashLastTimestampSecondsMetricLabels", "transformationType", item.transformationType,
|
||||
"providerName", item.providerName, "keyIDHash", item.keyIDHash, "apiServerIDHash", item.apiServerIDHash)
|
||||
}
|
||||
})
|
||||
keyIDHashStatusLastTimestampSecondsMetricLabels = lru.NewWithEvictionFunc(cacheSize, func(key lru.Key, _ interface{}) {
|
||||
item := key.(metricLabels)
|
||||
if deleted := KeyIDHashStatusLastTimestampSeconds.DeleteLabelValues(item.providerName, item.keyIDHash, item.apiServerIDHash); deleted {
|
||||
klog.InfoS("Deleted keyIDHashStatusLastTimestampSecondsMetricLabels", "providerName", item.providerName, "keyIDHash", item.keyIDHash, "apiServerIDHash", item.apiServerIDHash)
|
||||
}
|
||||
})
|
||||
}
|
||||
func RegisterMetrics() {
|
||||
registerMetricsFunc.Do(func() {
|
||||
registerLRUMetrics()
|
||||
hashPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
return sha256.New()
|
||||
},
|
||||
}
|
||||
legacyregistry.MustRegister(dekCacheFillPercent)
|
||||
legacyregistry.MustRegister(dekCacheInterArrivals)
|
||||
legacyregistry.MustRegister(DekSourceCacheSize)
|
||||
legacyregistry.MustRegister(KeyIDHashTotal)
|
||||
legacyregistry.MustRegister(KeyIDHashLastTimestampSeconds)
|
||||
legacyregistry.MustRegister(KeyIDHashStatusLastTimestampSeconds)
|
||||
legacyregistry.MustRegister(InvalidKeyIDFromStatusTotal)
|
||||
legacyregistry.MustRegister(KMSOperationsLatencyMetric)
|
||||
})
|
||||
}
|
||||
|
||||
// RecordKeyID records total count and last time in seconds when a KeyID was used for TransformFromStorage and TransformToStorage operations
|
||||
func RecordKeyID(transformationType, providerName, keyID, apiServerID string) {
|
||||
lockRecordKeyID.Lock()
|
||||
defer lockRecordKeyID.Unlock()
|
||||
|
||||
keyIDHash, apiServerIDHash := addLabelToCache(keyIDHashTotalMetricLabels, transformationType, providerName, keyID, apiServerID)
|
||||
KeyIDHashTotal.WithLabelValues(transformationType, providerName, keyIDHash, apiServerIDHash).Inc()
|
||||
KeyIDHashLastTimestampSeconds.WithLabelValues(transformationType, providerName, keyIDHash, apiServerIDHash).SetToCurrentTime()
|
||||
}
|
||||
|
||||
// RecordKeyIDFromStatus records last time in seconds when a KeyID was returned by the Status RPC call.
|
||||
func RecordKeyIDFromStatus(providerName, keyID, apiServerID string) {
|
||||
lockRecordKeyIDStatus.Lock()
|
||||
defer lockRecordKeyIDStatus.Unlock()
|
||||
|
||||
keyIDHash, apiServerIDHash := addLabelToCache(keyIDHashStatusLastTimestampSecondsMetricLabels, "", providerName, keyID, apiServerID)
|
||||
KeyIDHashStatusLastTimestampSeconds.WithLabelValues(providerName, keyIDHash, apiServerIDHash).SetToCurrentTime()
|
||||
}
|
||||
|
||||
func RecordInvalidKeyIDFromStatus(providerName, errCode string) {
|
||||
InvalidKeyIDFromStatusTotal.WithLabelValues(providerName, errCode).Inc()
|
||||
}
|
||||
|
||||
func RecordArrival(transformationType string, start time.Time) {
|
||||
switch transformationType {
|
||||
case FromStorageLabel:
|
||||
lockLastFromStorage.Lock()
|
||||
defer lockLastFromStorage.Unlock()
|
||||
|
||||
if lastFromStorage.IsZero() {
|
||||
lastFromStorage = start
|
||||
}
|
||||
dekCacheInterArrivals.WithLabelValues(transformationType).Observe(start.Sub(lastFromStorage).Seconds())
|
||||
lastFromStorage = start
|
||||
case ToStorageLabel:
|
||||
lockLastToStorage.Lock()
|
||||
defer lockLastToStorage.Unlock()
|
||||
|
||||
if lastToStorage.IsZero() {
|
||||
lastToStorage = start
|
||||
}
|
||||
dekCacheInterArrivals.WithLabelValues(transformationType).Observe(start.Sub(lastToStorage).Seconds())
|
||||
lastToStorage = start
|
||||
}
|
||||
}
|
||||
|
||||
func RecordDekCacheFillPercent(percent float64) {
|
||||
dekCacheFillPercent.Set(percent)
|
||||
}
|
||||
|
||||
func RecordDekSourceCacheSize(providerName string, size int) {
|
||||
DekSourceCacheSize.WithLabelValues(providerName).Set(float64(size))
|
||||
}
|
||||
|
||||
// RecordKMSOperationLatency records the latency of KMS operation.
|
||||
func RecordKMSOperationLatency(providerName, methodName string, duration time.Duration, err error) {
|
||||
KMSOperationsLatencyMetric.WithLabelValues(providerName, methodName, getErrorCode(err)).Observe(duration.Seconds())
|
||||
}
|
||||
|
||||
type gRPCError interface {
|
||||
GRPCStatus() *status.Status
|
||||
}
|
||||
|
||||
func getErrorCode(err error) string {
|
||||
if err == nil {
|
||||
return codes.OK.String()
|
||||
}
|
||||
|
||||
// handle errors wrapped with fmt.Errorf and similar
|
||||
var s gRPCError
|
||||
if errors.As(err, &s) {
|
||||
return s.GRPCStatus().Code().String()
|
||||
}
|
||||
|
||||
// This is not gRPC error. The operation must have failed before gRPC
|
||||
// method was called, otherwise we would get gRPC error.
|
||||
return "unknown-non-grpc"
|
||||
}
|
||||
|
||||
func getHash(data string) string {
|
||||
if len(data) == 0 {
|
||||
return ""
|
||||
}
|
||||
h := hashPool.Get().(hash.Hash)
|
||||
h.Reset()
|
||||
h.Write([]byte(data))
|
||||
dataHash := fmt.Sprintf("sha256:%x", h.Sum(nil))
|
||||
hashPool.Put(h)
|
||||
return dataHash
|
||||
}
|
||||
|
||||
func addLabelToCache(c *lru.Cache, transformationType, providerName, keyID, apiServerID string) (string, string) {
|
||||
keyIDHash := getHash(keyID)
|
||||
apiServerIDHash := getHash(apiServerID)
|
||||
c.Add(metricLabels{
|
||||
transformationType: transformationType,
|
||||
providerName: providerName,
|
||||
keyIDHash: keyIDHash,
|
||||
apiServerIDHash: apiServerIDHash,
|
||||
}, nil) // value is irrelevant, this is a set and not a map
|
||||
return keyIDHash, apiServerIDHash
|
||||
}
|
57
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/identity/identity.go
generated
vendored
57
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/identity/identity.go
generated
vendored
@ -1,57 +0,0 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package identity
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
)
|
||||
|
||||
var (
|
||||
transformer = identityTransformer{}
|
||||
encryptedPrefix = []byte("k8s:enc:")
|
||||
errEncryptedData = fmt.Errorf("identity transformer tried to read encrypted data")
|
||||
)
|
||||
|
||||
// identityTransformer performs no transformation on provided data, but validates
|
||||
// that the data is not encrypted data during TransformFromStorage
|
||||
type identityTransformer struct{}
|
||||
|
||||
// NewEncryptCheckTransformer returns an identityTransformer which returns an error
|
||||
// on attempts to read encrypted data
|
||||
func NewEncryptCheckTransformer() value.Transformer {
|
||||
return transformer
|
||||
}
|
||||
|
||||
// TransformFromStorage returns the input bytes if the data is not encrypted
|
||||
func (identityTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
// identityTransformer has to return an error if the data is encoded using another transformer.
|
||||
// JSON data starts with '{'. Protobuf data has a prefix 'k8s[\x00-\xFF]'.
|
||||
// Prefix 'k8s:enc:' is reserved for encrypted data on disk.
|
||||
if bytes.HasPrefix(data, encryptedPrefix) {
|
||||
return nil, false, errEncryptedData
|
||||
}
|
||||
return data, false, nil
|
||||
}
|
||||
|
||||
// TransformToStorage implements the Transformer interface for identityTransformer
|
||||
func (identityTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
return data, nil
|
||||
}
|
70
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/secretbox/secretbox.go
generated
vendored
70
e2e/vendor/k8s.io/apiserver/pkg/storage/value/encrypt/secretbox/secretbox.go
generated
vendored
@ -1,70 +0,0 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package secretbox transforms values for storage at rest using XSalsa20 and Poly1305.
|
||||
package secretbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/crypto/nacl/secretbox"
|
||||
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
)
|
||||
|
||||
// secretbox implements at rest encryption of the provided values given a 32 byte secret key.
|
||||
// Uses a standard 24 byte nonce (placed at the beginning of the cipher text) generated
|
||||
// from crypto/rand. Does not perform authentication of the data at rest.
|
||||
type secretboxTransformer struct {
|
||||
key [32]byte
|
||||
}
|
||||
|
||||
const nonceSize = 24
|
||||
|
||||
// NewSecretboxTransformer takes the given key and performs encryption and decryption on the given
|
||||
// data.
|
||||
func NewSecretboxTransformer(key [32]byte) value.Transformer {
|
||||
return &secretboxTransformer{key: key}
|
||||
}
|
||||
|
||||
func (t *secretboxTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
||||
if len(data) < (secretbox.Overhead + nonceSize) {
|
||||
return nil, false, fmt.Errorf("the stored data was shorter than the required size")
|
||||
}
|
||||
var nonce [nonceSize]byte
|
||||
copy(nonce[:], data[:nonceSize])
|
||||
data = data[nonceSize:]
|
||||
out := make([]byte, 0, len(data)-secretbox.Overhead)
|
||||
result, ok := secretbox.Open(out, data, &nonce, &t.key)
|
||||
if !ok {
|
||||
return nil, false, fmt.Errorf("output array was not large enough for encryption")
|
||||
}
|
||||
return result, false, nil
|
||||
}
|
||||
|
||||
func (t *secretboxTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
||||
var nonce [nonceSize]byte
|
||||
n, err := rand.Read(nonce[:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if n != nonceSize {
|
||||
return nil, fmt.Errorf("unable to read sufficient random bytes")
|
||||
}
|
||||
return secretbox.Seal(nonce[:], data, &nonce, &t.key), nil
|
||||
}
|
162
e2e/vendor/k8s.io/apiserver/pkg/storage/value/metrics.go
generated
vendored
162
e2e/vendor/k8s.io/apiserver/pkg/storage/value/metrics.go
generated
vendored
@ -1,162 +0,0 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package value
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
const (
|
||||
namespace = "apiserver"
|
||||
subsystem = "storage"
|
||||
)
|
||||
|
||||
/*
|
||||
* By default, all the following metrics are defined as falling under
|
||||
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
|
||||
*
|
||||
* Promoting the stability level of the metric is a responsibility of the component owner, since it
|
||||
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
|
||||
* the metric stability policy.
|
||||
*/
|
||||
var (
|
||||
transformerLatencies = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "transformation_duration_seconds",
|
||||
Help: "Latencies in seconds of value transformation operations.",
|
||||
// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
|
||||
// external KMS is involved latencies may climb into hundreds of milliseconds.
|
||||
Buckets: metrics.ExponentialBuckets(5e-6, 2, 25),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"transformation_type", "transformer_prefix"},
|
||||
)
|
||||
|
||||
transformerOperationsTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "transformation_operations_total",
|
||||
Help: "Total number of transformations. Successful transformation will have a status 'OK' and a varied status string when the transformation fails. The status, resource, and transformation_type fields can be used for alerting purposes. For example, you can monitor for encryption/decryption failures using the transformation_type (e.g., from_storage for decryption and to_storage for encryption). Additionally, these fields can be used to ensure that the correct transformers are applied to each resource.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"resource", "transformation_type", "transformer_prefix", "status"},
|
||||
)
|
||||
|
||||
envelopeTransformationCacheMissTotal = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "envelope_transformation_cache_misses_total",
|
||||
Help: "Total number of cache misses while accessing key decryption key(KEK).",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
|
||||
dataKeyGenerationLatencies = metrics.NewHistogram(
|
||||
&metrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "data_key_generation_duration_seconds",
|
||||
Help: "Latencies in seconds of data encryption key(DEK) generation operations.",
|
||||
Buckets: metrics.ExponentialBuckets(5e-6, 2, 14),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
|
||||
dataKeyGenerationFailuresTotal = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "data_key_generation_failures_total",
|
||||
Help: "Total number of failed data encryption key(DEK) generation operations.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
func RegisterMetrics() {
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(transformerLatencies)
|
||||
legacyregistry.MustRegister(transformerOperationsTotal)
|
||||
legacyregistry.MustRegister(envelopeTransformationCacheMissTotal)
|
||||
legacyregistry.MustRegister(dataKeyGenerationLatencies)
|
||||
legacyregistry.MustRegister(dataKeyGenerationFailuresTotal)
|
||||
})
|
||||
}
|
||||
|
||||
// RecordTransformation records latencies and count of TransformFromStorage and TransformToStorage operations.
|
||||
// Note that transformation_failures_total metric is deprecated, use transformation_operations_total instead.
|
||||
func RecordTransformation(resource, transformationType, transformerPrefix string, elapsed time.Duration, err error) {
|
||||
transformerOperationsTotal.WithLabelValues(resource, transformationType, transformerPrefix, getErrorCode(err)).Inc()
|
||||
|
||||
if err == nil {
|
||||
transformerLatencies.WithLabelValues(transformationType, transformerPrefix).Observe(elapsed.Seconds())
|
||||
}
|
||||
}
|
||||
|
||||
// RecordCacheMiss records a miss on Key Encryption Key(KEK) - call to KMS was required to decrypt KEK.
|
||||
func RecordCacheMiss() {
|
||||
envelopeTransformationCacheMissTotal.Inc()
|
||||
}
|
||||
|
||||
// RecordDataKeyGeneration records latencies and count of Data Encryption Key generation operations.
|
||||
func RecordDataKeyGeneration(start time.Time, err error) {
|
||||
if err != nil {
|
||||
dataKeyGenerationFailuresTotal.Inc()
|
||||
return
|
||||
}
|
||||
|
||||
dataKeyGenerationLatencies.Observe(sinceInSeconds(start))
|
||||
}
|
||||
|
||||
// sinceInSeconds gets the time since the specified start in seconds.
|
||||
func sinceInSeconds(start time.Time) float64 {
|
||||
return time.Since(start).Seconds()
|
||||
}
|
||||
|
||||
type gRPCError interface {
|
||||
GRPCStatus() *status.Status
|
||||
}
|
||||
|
||||
func getErrorCode(err error) string {
|
||||
if err == nil {
|
||||
return codes.OK.String()
|
||||
}
|
||||
|
||||
// handle errors wrapped with fmt.Errorf and similar
|
||||
var s gRPCError
|
||||
if errors.As(err, &s) {
|
||||
return s.GRPCStatus().Code().String()
|
||||
}
|
||||
|
||||
// This is not gRPC error. The operation must have failed before gRPC
|
||||
// method was called, otherwise we would get gRPC error.
|
||||
return "unknown-non-grpc"
|
||||
}
|
221
e2e/vendor/k8s.io/apiserver/pkg/storage/value/transformer.go
generated
vendored
221
e2e/vendor/k8s.io/apiserver/pkg/storage/value/transformer.go
generated
vendored
@ -1,221 +0,0 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package value contains methods for assisting with transformation of values in storage.
|
||||
package value
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/util/errors"
|
||||
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterMetrics()
|
||||
}
|
||||
|
||||
// Context is additional information that a storage transformation may need to verify the data at rest.
|
||||
type Context interface {
|
||||
// AuthenticatedData should return an array of bytes that describes the current value. If the value changes,
|
||||
// the transformer may report the value as unreadable or tampered. This may be nil if no such description exists
|
||||
// or is needed. For additional verification, set this to data that strongly identifies the value, such as
|
||||
// the key and creation version of the stored data.
|
||||
AuthenticatedData() []byte
|
||||
}
|
||||
|
||||
type Read interface {
|
||||
// TransformFromStorage may transform the provided data from its underlying storage representation or return an error.
|
||||
// Stale is true if the object on disk is stale and a write to etcd should be issued, even if the contents of the object
|
||||
// have not changed.
|
||||
TransformFromStorage(ctx context.Context, data []byte, dataCtx Context) (out []byte, stale bool, err error)
|
||||
}
|
||||
|
||||
type Write interface {
|
||||
// TransformToStorage may transform the provided data into the appropriate form in storage or return an error.
|
||||
TransformToStorage(ctx context.Context, data []byte, dataCtx Context) (out []byte, err error)
|
||||
}
|
||||
|
||||
// Transformer allows a value to be transformed before being read from or written to the underlying store. The methods
|
||||
// must be able to undo the transformation caused by the other.
|
||||
type Transformer interface {
|
||||
Read
|
||||
Write
|
||||
}
|
||||
|
||||
// ResourceTransformers returns a transformer for the provided resource.
|
||||
type ResourceTransformers interface {
|
||||
TransformerForResource(resource schema.GroupResource) Transformer
|
||||
}
|
||||
|
||||
// DefaultContext is a simple implementation of Context for a slice of bytes.
|
||||
type DefaultContext []byte
|
||||
|
||||
// AuthenticatedData returns itself.
|
||||
func (c DefaultContext) AuthenticatedData() []byte { return c }
|
||||
|
||||
// PrefixTransformer holds a transformer interface and the prefix that the transformation is located under.
|
||||
type PrefixTransformer struct {
|
||||
Prefix []byte
|
||||
Transformer Transformer
|
||||
}
|
||||
|
||||
type prefixTransformers struct {
|
||||
transformers []PrefixTransformer
|
||||
err error
|
||||
}
|
||||
|
||||
var _ Transformer = &prefixTransformers{}
|
||||
|
||||
// NewPrefixTransformers supports the Transformer interface by checking the incoming data against the provided
|
||||
// prefixes in order. The first matching prefix will be used to transform the value (the prefix is stripped
|
||||
// before the Transformer interface is invoked). The first provided transformer will be used when writing to
|
||||
// the store.
|
||||
func NewPrefixTransformers(err error, transformers ...PrefixTransformer) Transformer {
|
||||
if err == nil {
|
||||
err = fmt.Errorf("the provided value does not match any of the supported transformers")
|
||||
}
|
||||
return &prefixTransformers{
|
||||
transformers: transformers,
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
|
||||
// TransformFromStorage finds the first transformer with a prefix matching the provided data and returns
|
||||
// the result of transforming the value. It will always mark any transformation as stale that is not using
|
||||
// the first transformer.
|
||||
func (t *prefixTransformers) TransformFromStorage(ctx context.Context, data []byte, dataCtx Context) ([]byte, bool, error) {
|
||||
start := time.Now()
|
||||
var errs []error
|
||||
resource := getResourceFromContext(ctx)
|
||||
for i, transformer := range t.transformers {
|
||||
if bytes.HasPrefix(data, transformer.Prefix) {
|
||||
result, stale, err := transformer.Transformer.TransformFromStorage(ctx, data[len(transformer.Prefix):], dataCtx)
|
||||
// To migrate away from encryption, user can specify an identity transformer higher up
|
||||
// (in the config file) than the encryption transformer. In that scenario, the identity transformer needs to
|
||||
// identify (during reads from disk) whether the data being read is encrypted or not. If the data is encrypted,
|
||||
// it shall throw an error, but that error should not prevent the next subsequent transformer from being tried.
|
||||
if len(transformer.Prefix) == 0 && err != nil {
|
||||
continue
|
||||
}
|
||||
if len(transformer.Prefix) == 0 {
|
||||
RecordTransformation(resource, "from_storage", "identity", time.Since(start), err)
|
||||
} else {
|
||||
RecordTransformation(resource, "from_storage", string(transformer.Prefix), time.Since(start), err)
|
||||
}
|
||||
|
||||
// It is valid to have overlapping prefixes when the same encryption provider
|
||||
// is specified multiple times but with different keys (the first provider is
|
||||
// being rotated to and some later provider is being rotated away from).
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// {
|
||||
// "aescbc": {
|
||||
// "keys": [
|
||||
// {
|
||||
// "name": "2",
|
||||
// "secret": "some key 2"
|
||||
// }
|
||||
// ]
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// "aescbc": {
|
||||
// "keys": [
|
||||
// {
|
||||
// "name": "1",
|
||||
// "secret": "some key 1"
|
||||
// }
|
||||
// ]
|
||||
// }
|
||||
// },
|
||||
//
|
||||
// The transformers for both aescbc configs share the prefix k8s:enc:aescbc:v1:
|
||||
// but a failure in the first one should not prevent a later match from being attempted.
|
||||
// Thus we never short-circuit on a prefix match that results in an error.
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
continue
|
||||
}
|
||||
|
||||
return result, stale || i != 0, err
|
||||
}
|
||||
}
|
||||
if err := errors.Reduce(errors.NewAggregate(errs)); err != nil {
|
||||
logTransformErr(ctx, err, "failed to decrypt data")
|
||||
return nil, false, err
|
||||
}
|
||||
RecordTransformation(resource, "from_storage", "unknown", time.Since(start), t.err)
|
||||
return nil, false, t.err
|
||||
}
|
||||
|
||||
// TransformToStorage uses the first transformer and adds its prefix to the data.
|
||||
func (t *prefixTransformers) TransformToStorage(ctx context.Context, data []byte, dataCtx Context) ([]byte, error) {
|
||||
start := time.Now()
|
||||
transformer := t.transformers[0]
|
||||
resource := getResourceFromContext(ctx)
|
||||
result, err := transformer.Transformer.TransformToStorage(ctx, data, dataCtx)
|
||||
RecordTransformation(resource, "to_storage", string(transformer.Prefix), time.Since(start), err)
|
||||
if err != nil {
|
||||
logTransformErr(ctx, err, "failed to encrypt data")
|
||||
return nil, err
|
||||
}
|
||||
prefixedData := make([]byte, len(transformer.Prefix), len(result)+len(transformer.Prefix))
|
||||
copy(prefixedData, transformer.Prefix)
|
||||
prefixedData = append(prefixedData, result...)
|
||||
return prefixedData, nil
|
||||
}
|
||||
|
||||
func logTransformErr(ctx context.Context, err error, message string) {
|
||||
requestInfo := getRequestInfoFromContext(ctx)
|
||||
if klogLevel6 := klog.V(6); klogLevel6.Enabled() {
|
||||
klogLevel6.InfoSDepth(
|
||||
1,
|
||||
message,
|
||||
"err", err,
|
||||
"group", requestInfo.APIGroup,
|
||||
"version", requestInfo.APIVersion,
|
||||
"resource", requestInfo.Resource,
|
||||
"subresource", requestInfo.Subresource,
|
||||
"verb", requestInfo.Verb,
|
||||
"namespace", requestInfo.Namespace,
|
||||
"name", requestInfo.Name,
|
||||
)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
klog.ErrorSDepth(1, err, message)
|
||||
}
|
||||
|
||||
func getRequestInfoFromContext(ctx context.Context) *genericapirequest.RequestInfo {
|
||||
if reqInfo, found := genericapirequest.RequestInfoFrom(ctx); found {
|
||||
return reqInfo
|
||||
}
|
||||
klog.V(4).InfoSDepth(1, "no request info on context")
|
||||
return &genericapirequest.RequestInfo{}
|
||||
}
|
||||
|
||||
func getResourceFromContext(ctx context.Context) string {
|
||||
reqInfo := getRequestInfoFromContext(ctx)
|
||||
return schema.GroupResource{Group: reqInfo.APIGroup, Resource: reqInfo.Resource}.String()
|
||||
}
|
Reference in New Issue
Block a user