rebase: bump k8s.io/kubernetes from 1.26.2 to 1.27.2

Bumps [k8s.io/kubernetes](https://github.com/kubernetes/kubernetes) from 1.26.2 to 1.27.2.
- [Release notes](https://github.com/kubernetes/kubernetes/releases)
- [Commits](https://github.com/kubernetes/kubernetes/compare/v1.26.2...v1.27.2)

---
updated-dependencies:
- dependency-name: k8s.io/kubernetes
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2023-05-29 21:03:29 +00:00
committed by mergify[bot]
parent 0e79135419
commit 07b05616a0
1072 changed files with 208716 additions and 198880 deletions

100
vendor/k8s.io/apiserver/pkg/util/apihelpers/helpers.go generated vendored Normal file
View File

@ -0,0 +1,100 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package apihelpers
import (
"sort"
flowcontrol "k8s.io/api/flowcontrol/v1beta3"
)
// SetFlowSchemaCondition sets conditions.
func SetFlowSchemaCondition(flowSchema *flowcontrol.FlowSchema, newCondition flowcontrol.FlowSchemaCondition) {
existingCondition := GetFlowSchemaConditionByType(flowSchema, newCondition.Type)
if existingCondition == nil {
flowSchema.Status.Conditions = append(flowSchema.Status.Conditions, newCondition)
return
}
if existingCondition.Status != newCondition.Status {
existingCondition.Status = newCondition.Status
existingCondition.LastTransitionTime = newCondition.LastTransitionTime
}
existingCondition.Reason = newCondition.Reason
existingCondition.Message = newCondition.Message
}
// GetFlowSchemaConditionByType gets conditions.
func GetFlowSchemaConditionByType(flowSchema *flowcontrol.FlowSchema, conditionType flowcontrol.FlowSchemaConditionType) *flowcontrol.FlowSchemaCondition {
for i := range flowSchema.Status.Conditions {
if flowSchema.Status.Conditions[i].Type == conditionType {
return &flowSchema.Status.Conditions[i]
}
}
return nil
}
// SetPriorityLevelConfigurationCondition sets conditions.
func SetPriorityLevelConfigurationCondition(priorityLevel *flowcontrol.PriorityLevelConfiguration, newCondition flowcontrol.PriorityLevelConfigurationCondition) {
existingCondition := GetPriorityLevelConfigurationConditionByType(priorityLevel, newCondition.Type)
if existingCondition == nil {
priorityLevel.Status.Conditions = append(priorityLevel.Status.Conditions, newCondition)
return
}
if existingCondition.Status != newCondition.Status {
existingCondition.Status = newCondition.Status
existingCondition.LastTransitionTime = newCondition.LastTransitionTime
}
existingCondition.Reason = newCondition.Reason
existingCondition.Message = newCondition.Message
}
// GetPriorityLevelConfigurationConditionByType gets conditions.
func GetPriorityLevelConfigurationConditionByType(priorityLevel *flowcontrol.PriorityLevelConfiguration, conditionType flowcontrol.PriorityLevelConfigurationConditionType) *flowcontrol.PriorityLevelConfigurationCondition {
for i := range priorityLevel.Status.Conditions {
if priorityLevel.Status.Conditions[i].Type == conditionType {
return &priorityLevel.Status.Conditions[i]
}
}
return nil
}
var _ sort.Interface = FlowSchemaSequence{}
// FlowSchemaSequence holds sorted set of pointers to FlowSchema objects.
// FlowSchemaSequence implements `sort.Interface`
type FlowSchemaSequence []*flowcontrol.FlowSchema
func (s FlowSchemaSequence) Len() int {
return len(s)
}
func (s FlowSchemaSequence) Less(i, j int) bool {
// the flow-schema w/ lower matching-precedence is prior
if ip, jp := s[i].Spec.MatchingPrecedence, s[j].Spec.MatchingPrecedence; ip != jp {
return ip < jp
}
// sort alphabetically
return s[i].Name < s[j].Name
}
func (s FlowSchemaSequence) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}

22
vendor/k8s.io/apiserver/pkg/util/dryrun/dryrun.go generated vendored Normal file
View File

@ -0,0 +1,22 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dryrun
// IsDryRun returns true if the DryRun flag is an actual dry-run.
func IsDryRun(flag []string) bool {
return len(flag) > 0
}

15
vendor/k8s.io/apiserver/pkg/util/flowcontrol/OWNERS generated vendored Normal file
View File

@ -0,0 +1,15 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- lavalamp
- deads2k
- yue9944882
- MikeSpreitzer
reviewers:
- lavalamp
- deads2k
- yue9944882
- MikeSpreitzer
labels:
- sig/api-machinery
- area/apiserver

View File

@ -0,0 +1,93 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flowcontrol
import (
"context"
"sync"
)
type priorityAndFairnessKeyType int
const (
// priorityAndFairnessInitializationSignalKey is a key under which
// initialization signal function for watch requests is stored
// in the context.
priorityAndFairnessInitializationSignalKey priorityAndFairnessKeyType = iota
)
// WithInitializationSignal creates a copy of parent context with
// priority and fairness initialization signal value.
func WithInitializationSignal(ctx context.Context, signal InitializationSignal) context.Context {
return context.WithValue(ctx, priorityAndFairnessInitializationSignalKey, signal)
}
// initializationSignalFrom returns an initialization signal function
// which when called signals that watch initialization has already finished
// to priority and fairness dispatcher.
func initializationSignalFrom(ctx context.Context) (InitializationSignal, bool) {
signal, ok := ctx.Value(priorityAndFairnessInitializationSignalKey).(InitializationSignal)
return signal, ok && signal != nil
}
// WatchInitialized sends a signal to priority and fairness dispatcher
// that a given watch request has already been initialized.
func WatchInitialized(ctx context.Context) {
if signal, ok := initializationSignalFrom(ctx); ok {
signal.Signal()
}
}
// RequestDelegated informs the priority and fairness dispatcher that
// a given request has been delegated to an aggregated API
// server. No-op when priority and fairness is disabled.
func RequestDelegated(ctx context.Context) {
// The watch initialization signal doesn't traverse request
// boundaries, so we generously fire it as soon as we know
// that the request won't be serviced locally. Safe to call
// for non-watch requests.
WatchInitialized(ctx)
}
// InitializationSignal is an interface that allows sending and handling
// initialization signals.
type InitializationSignal interface {
// Signal notifies the dispatcher about finished initialization.
Signal()
// Wait waits for the initialization signal.
Wait()
}
type initializationSignal struct {
once sync.Once
done chan struct{}
}
func NewInitializationSignal() InitializationSignal {
return &initializationSignal{
once: sync.Once{},
done: make(chan struct{}),
}
}
func (i *initializationSignal) Signal() {
i.once.Do(func() { close(i.done) })
}
func (i *initializationSignal) Wait() {
<-i.done
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,265 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flowcontrol
import (
"fmt"
"io"
"net/http"
"strconv"
"strings"
"text/tabwriter"
"time"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apiserver/pkg/server/mux"
)
const (
queryIncludeRequestDetails = "includeRequestDetails"
)
func (cfgCtlr *configController) Install(c *mux.PathRecorderMux) {
// TODO(yue9944882): handle "Accept" header properly
// debugging dumps a CSV content for three levels of granularity
// 1. row per priority-level
c.UnlistedHandleFunc("/debug/api_priority_and_fairness/dump_priority_levels", cfgCtlr.dumpPriorityLevels)
// 2. row per queue
c.UnlistedHandleFunc("/debug/api_priority_and_fairness/dump_queues", cfgCtlr.dumpQueues)
// 3. row per request
c.UnlistedHandleFunc("/debug/api_priority_and_fairness/dump_requests", cfgCtlr.dumpRequests)
}
func (cfgCtlr *configController) dumpPriorityLevels(w http.ResponseWriter, r *http.Request) {
cfgCtlr.lock.Lock()
defer cfgCtlr.lock.Unlock()
tabWriter := tabwriter.NewWriter(w, 8, 0, 1, ' ', 0)
columnHeaders := []string{
"PriorityLevelName", // 1
"ActiveQueues", // 2
"IsIdle", // 3
"IsQuiescing", // 4
"WaitingRequests", // 5
"ExecutingRequests", // 6
}
tabPrint(tabWriter, rowForHeaders(columnHeaders))
endLine(tabWriter)
for _, plState := range cfgCtlr.priorityLevelStates {
if plState.queues == nil {
tabPrint(tabWriter, row(
plState.pl.Name, // 1
"<none>", // 2
"<none>", // 3
"<none>", // 4
"<none>", // 5
"<none>", // 6
))
endLine(tabWriter)
continue
}
queueSetDigest := plState.queues.Dump(false)
activeQueueNum := 0
for _, q := range queueSetDigest.Queues {
if len(q.Requests) > 0 {
activeQueueNum++
}
}
tabPrint(tabWriter, rowForPriorityLevel(
plState.pl.Name, // 1
activeQueueNum, // 2
plState.queues.IsIdle(), // 3
plState.quiescing, // 4
queueSetDigest.Waiting, // 5
queueSetDigest.Executing, // 6
))
endLine(tabWriter)
}
runtime.HandleError(tabWriter.Flush())
}
func (cfgCtlr *configController) dumpQueues(w http.ResponseWriter, r *http.Request) {
cfgCtlr.lock.Lock()
defer cfgCtlr.lock.Unlock()
tabWriter := tabwriter.NewWriter(w, 8, 0, 1, ' ', 0)
columnHeaders := []string{
"PriorityLevelName", // 1
"Index", // 2
"PendingRequests", // 3
"ExecutingRequests", // 4
"SeatsInUse", // 5
"NextDispatchR", // 6
"InitialSeatsSum", // 7
"MaxSeatsSum", // 8
"TotalWorkSum", // 9
}
tabPrint(tabWriter, rowForHeaders(columnHeaders))
endLine(tabWriter)
for _, plState := range cfgCtlr.priorityLevelStates {
if plState.queues == nil {
tabPrint(tabWriter, row(
plState.pl.Name, // 1
"<none>", // 2
"<none>", // 3
"<none>", // 4
"<none>", // 5
"<none>", // 6
"<none>", // 7
"<none>", // 8
"<none>", // 9
))
endLine(tabWriter)
continue
}
queueSetDigest := plState.queues.Dump(false)
for i, q := range queueSetDigest.Queues {
tabPrint(tabWriter, row(
plState.pl.Name, // 1 - "PriorityLevelName"
strconv.Itoa(i), // 2 - "Index"
strconv.Itoa(len(q.Requests)), // 3 - "PendingRequests"
strconv.Itoa(q.ExecutingRequests), // 4 - "ExecutingRequests"
strconv.Itoa(q.SeatsInUse), // 5 - "SeatsInUse"
q.NextDispatchR, // 6 - "NextDispatchR"
strconv.Itoa(q.QueueSum.InitialSeatsSum), // 7 - "InitialSeatsSum"
strconv.Itoa(q.QueueSum.MaxSeatsSum), // 8 - "MaxSeatsSum"
q.QueueSum.TotalWorkSum, // 9 - "TotalWorkSum"
))
endLine(tabWriter)
}
}
runtime.HandleError(tabWriter.Flush())
}
func (cfgCtlr *configController) dumpRequests(w http.ResponseWriter, r *http.Request) {
cfgCtlr.lock.Lock()
defer cfgCtlr.lock.Unlock()
includeRequestDetails := len(r.URL.Query().Get(queryIncludeRequestDetails)) > 0
tabWriter := tabwriter.NewWriter(w, 8, 0, 1, ' ', 0)
tabPrint(tabWriter, rowForHeaders([]string{
"PriorityLevelName", // 1
"FlowSchemaName", // 2
"QueueIndex", // 3
"RequestIndexInQueue", // 4
"FlowDistingsher", // 5
"ArriveTime", // 6
"InitialSeats", // 7
"FinalSeats", // 8
"AdditionalLatency", // 9
}))
if includeRequestDetails {
continueLine(tabWriter)
tabPrint(tabWriter, rowForHeaders([]string{
"UserName", // 10
"Verb", // 11
"APIPath", // 12
"Namespace", // 13
"Name", // 14
"APIVersion", // 15
"Resource", // 16
"SubResource", // 17
}))
}
endLine(tabWriter)
for _, plState := range cfgCtlr.priorityLevelStates {
if plState.queues == nil {
continue
}
queueSetDigest := plState.queues.Dump(includeRequestDetails)
for iq, q := range queueSetDigest.Queues {
for ir, r := range q.Requests {
tabPrint(tabWriter, row(
plState.pl.Name, // 1
r.MatchedFlowSchema, // 2
strconv.Itoa(iq), // 3
strconv.Itoa(ir), // 4
r.FlowDistinguisher, // 5
r.ArriveTime.UTC().Format(time.RFC3339Nano), // 6
strconv.Itoa(int(r.WorkEstimate.InitialSeats)), // 7
strconv.Itoa(int(r.WorkEstimate.FinalSeats)), // 8
r.WorkEstimate.AdditionalLatency.String(), // 9
))
if includeRequestDetails {
continueLine(tabWriter)
tabPrint(tabWriter, rowForRequestDetails(
r.UserName, // 10
r.RequestInfo.Verb, // 11
r.RequestInfo.Path, // 12
r.RequestInfo.Namespace, // 13
r.RequestInfo.Name, // 14
schema.GroupVersion{
Group: r.RequestInfo.APIGroup,
Version: r.RequestInfo.APIVersion,
}.String(), // 15
r.RequestInfo.Resource, // 16
r.RequestInfo.Subresource, // 17
))
}
endLine(tabWriter)
}
}
}
runtime.HandleError(tabWriter.Flush())
}
func tabPrint(w io.Writer, row string) {
_, err := fmt.Fprint(w, row)
runtime.HandleError(err)
}
func continueLine(w io.Writer) {
_, err := fmt.Fprint(w, ",\t")
runtime.HandleError(err)
}
func endLine(w io.Writer) {
_, err := fmt.Fprint(w, "\n")
runtime.HandleError(err)
}
func rowForHeaders(headers []string) string {
return row(headers...)
}
func rowForPriorityLevel(plName string, activeQueues int, isIdle, isQuiescing bool, waitingRequests, executingRequests int) string {
return row(
plName,
strconv.Itoa(activeQueues),
strconv.FormatBool(isIdle),
strconv.FormatBool(isQuiescing),
strconv.Itoa(waitingRequests),
strconv.Itoa(executingRequests),
)
}
func rowForRequestDetails(username, verb, path, namespace, name, apiVersion, resource, subResource string) string {
return row(
username,
verb,
path,
namespace,
name,
apiVersion,
resource,
subResource,
)
}
func row(columns ...string) string {
return strings.Join(columns, ",\t")
}

View File

@ -0,0 +1,197 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flowcontrol
import (
"context"
"strconv"
"time"
"k8s.io/apiserver/pkg/server/httplog"
"k8s.io/apiserver/pkg/server/mux"
fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
"k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/eventclock"
fqs "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset"
"k8s.io/apiserver/pkg/util/flowcontrol/metrics"
fcrequest "k8s.io/apiserver/pkg/util/flowcontrol/request"
kubeinformers "k8s.io/client-go/informers"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
flowcontrol "k8s.io/api/flowcontrol/v1beta3"
flowcontrolclient "k8s.io/client-go/kubernetes/typed/flowcontrol/v1beta3"
)
// ConfigConsumerAsFieldManager is how the config consuminng
// controller appears in an ObjectMeta ManagedFieldsEntry.Manager
const ConfigConsumerAsFieldManager = "api-priority-and-fairness-config-consumer-v1"
// Interface defines how the API Priority and Fairness filter interacts with the underlying system.
type Interface interface {
// Handle takes care of queuing and dispatching a request
// characterized by the given digest. The given `noteFn` will be
// invoked with the results of request classification.
// The given `workEstimator` is called, if at all, after noteFn.
// `workEstimator` will be invoked only when the request
// is classified as non 'exempt'.
// 'workEstimator', when invoked, must return the
// work parameters for the request.
// If the request is queued then `queueNoteFn` will be called twice,
// first with `true` and then with `false`; otherwise
// `queueNoteFn` will not be called at all. If Handle decides
// that the request should be executed then `execute()` will be
// invoked once to execute the request; otherwise `execute()` will
// not be invoked.
// Handle() should never return while execute() is running, even if
// ctx is cancelled or times out.
Handle(ctx context.Context,
requestDigest RequestDigest,
noteFn func(fs *flowcontrol.FlowSchema, pl *flowcontrol.PriorityLevelConfiguration, flowDistinguisher string),
workEstimator func() fcrequest.WorkEstimate,
queueNoteFn fq.QueueNoteFn,
execFn func(),
)
// Run monitors config objects from the main apiservers and causes
// any needed changes to local behavior. This method ceases
// activity and returns after the given channel is closed.
Run(stopCh <-chan struct{}) error
// Install installs debugging endpoints to the web-server.
Install(c *mux.PathRecorderMux)
// WatchTracker provides the WatchTracker interface.
WatchTracker
}
// This request filter implements https://github.com/kubernetes/enhancements/blob/master/keps/sig-api-machinery/1040-priority-and-fairness/README.md
// New creates a new instance to implement API priority and fairness
func New(
informerFactory kubeinformers.SharedInformerFactory,
flowcontrolClient flowcontrolclient.FlowcontrolV1beta3Interface,
serverConcurrencyLimit int,
requestWaitLimit time.Duration,
) Interface {
clk := eventclock.Real{}
return NewTestable(TestableConfig{
Name: "Controller",
Clock: clk,
AsFieldManager: ConfigConsumerAsFieldManager,
FoundToDangling: func(found bool) bool { return !found },
InformerFactory: informerFactory,
FlowcontrolClient: flowcontrolClient,
ServerConcurrencyLimit: serverConcurrencyLimit,
RequestWaitLimit: requestWaitLimit,
ReqsGaugeVec: metrics.PriorityLevelConcurrencyGaugeVec,
ExecSeatsGaugeVec: metrics.PriorityLevelExecutionSeatsGaugeVec,
QueueSetFactory: fqs.NewQueueSetFactory(clk),
})
}
// TestableConfig carries the parameters to an implementation that is testable
type TestableConfig struct {
// Name of the controller
Name string
// Clock to use in timing deliberate delays
Clock clock.PassiveClock
// AsFieldManager is the string to use in the metadata for
// server-side apply. Normally this is
// `ConfigConsumerAsFieldManager`. This is exposed as a parameter
// so that a test of competing controllers can supply different
// values.
AsFieldManager string
// FoundToDangling maps the boolean indicating whether a
// FlowSchema's referenced PLC exists to the boolean indicating
// that FlowSchema's status should indicate a dangling reference.
// This is a parameter so that we can write tests of what happens
// when servers disagree on that bit of Status.
FoundToDangling func(bool) bool
// InformerFactory to use in building the controller
InformerFactory kubeinformers.SharedInformerFactory
// FlowcontrolClient to use for manipulating config objects
FlowcontrolClient flowcontrolclient.FlowcontrolV1beta3Interface
// ServerConcurrencyLimit for the controller to enforce
ServerConcurrencyLimit int
// RequestWaitLimit configured on the server
RequestWaitLimit time.Duration
// GaugeVec for metrics about requests, broken down by phase and priority_level
ReqsGaugeVec metrics.RatioedGaugeVec
// RatioedGaugePairVec for metrics about seats occupied by all phases of execution
ExecSeatsGaugeVec metrics.RatioedGaugeVec
// QueueSetFactory for the queuing implementation
QueueSetFactory fq.QueueSetFactory
}
// NewTestable is extra flexible to facilitate testing
func NewTestable(config TestableConfig) Interface {
return newTestableController(config)
}
func (cfgCtlr *configController) Handle(ctx context.Context, requestDigest RequestDigest,
noteFn func(fs *flowcontrol.FlowSchema, pl *flowcontrol.PriorityLevelConfiguration, flowDistinguisher string),
workEstimator func() fcrequest.WorkEstimate,
queueNoteFn fq.QueueNoteFn,
execFn func()) {
fs, pl, isExempt, req, startWaitingTime := cfgCtlr.startRequest(ctx, requestDigest, noteFn, workEstimator, queueNoteFn)
queued := startWaitingTime != time.Time{}
if req == nil {
if queued {
metrics.ObserveWaitingDuration(ctx, pl.Name, fs.Name, strconv.FormatBool(req != nil), time.Since(startWaitingTime))
}
klog.V(7).Infof("Handle(%#+v) => fsName=%q, distMethod=%#+v, plName=%q, isExempt=%v, reject", requestDigest, fs.Name, fs.Spec.DistinguisherMethod, pl.Name, isExempt)
return
}
klog.V(7).Infof("Handle(%#+v) => fsName=%q, distMethod=%#+v, plName=%q, isExempt=%v, queued=%v", requestDigest, fs.Name, fs.Spec.DistinguisherMethod, pl.Name, isExempt, queued)
var executed bool
idle, panicking := true, true
defer func() {
klog.V(7).Infof("Handle(%#+v) => fsName=%q, distMethod=%#+v, plName=%q, isExempt=%v, queued=%v, Finish() => panicking=%v idle=%v",
requestDigest, fs.Name, fs.Spec.DistinguisherMethod, pl.Name, isExempt, queued, panicking, idle)
if idle {
cfgCtlr.maybeReap(pl.Name)
}
}()
idle = req.Finish(func() {
if queued {
metrics.ObserveWaitingDuration(ctx, pl.Name, fs.Name, strconv.FormatBool(req != nil), time.Since(startWaitingTime))
}
metrics.AddDispatch(ctx, pl.Name, fs.Name)
executed = true
startExecutionTime := time.Now()
defer func() {
executionTime := time.Since(startExecutionTime)
httplog.AddKeyValue(ctx, "apf_execution_time", executionTime)
metrics.ObserveExecutionDuration(ctx, pl.Name, fs.Name, executionTime)
}()
execFn()
})
if queued && !executed {
metrics.ObserveWaitingDuration(ctx, pl.Name, fs.Name, strconv.FormatBool(req != nil), time.Since(startWaitingTime))
}
panicking = false
}

View File

@ -0,0 +1,256 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flowcontrol
import (
"errors"
"fmt"
"math"
"sort"
)
// allocProblemItem is one of the classes to which computeConcurrencyAllocation should make an allocation
type allocProblemItem struct {
target float64
lowerBound float64
upperBound float64
}
// relativeAllocItem is like allocProblemItem but with target avoiding zero and the bounds divided by the target
type relativeAllocItem struct {
target float64
relativeLowerBound float64
relativeUpperBound float64
}
// relativeAllocProblem collects together all the classes and holds the result of sorting by increasing bounds.
// For J <= K, ascendingIndices[J] identifies a bound that is <= the one of ascendingIndices[K].
// When ascendingIndices[J] = 2*N + 0, this identifies the lower bound of items[N].
// When ascendingIndices[J] = 2*N + 1, this identifies the upper bound of items[N].
type relativeAllocProblem struct {
items []relativeAllocItem
ascendingIndices []int
}
// initIndices fills in ascendingIndices and sorts them
func (rap *relativeAllocProblem) initIndices() *relativeAllocProblem {
rap.ascendingIndices = make([]int, len(rap.items)*2)
for idx := 0; idx < len(rap.ascendingIndices); idx++ {
rap.ascendingIndices[idx] = idx
}
sort.Sort(rap)
return rap
}
func (rap *relativeAllocProblem) getItemIndex(idx int) (int, bool) {
packedIndex := rap.ascendingIndices[idx]
itemIndex := packedIndex / 2
return itemIndex, packedIndex == itemIndex*2
}
// decode(J) returns the bound associated with ascendingIndices[J], the associated items index,
// and a bool indicating whether the bound is the item's lower bound.
func (rap *relativeAllocProblem) decode(idx int) (float64, int, bool) {
itemIdx, lower := rap.getItemIndex(idx)
if lower {
return rap.items[itemIdx].relativeLowerBound, itemIdx, lower
}
return rap.items[itemIdx].relativeUpperBound, itemIdx, lower
}
func (rap *relativeAllocProblem) getProportion(idx int) float64 {
prop, _, _ := rap.decode(idx)
return prop
}
func (rap *relativeAllocProblem) Len() int { return len(rap.items) * 2 }
func (rap *relativeAllocProblem) Less(i, j int) bool {
return rap.getProportion(i) < rap.getProportion(j)
}
func (rap *relativeAllocProblem) Swap(i, j int) {
rap.ascendingIndices[i], rap.ascendingIndices[j] = rap.ascendingIndices[j], rap.ascendingIndices[i]
}
// minMax records the minimum and maximum value seen while scanning a set of numbers
type minMax struct {
min float64
max float64
}
// note scans one more number
func (mm *minMax) note(x float64) {
mm.min = math.Min(mm.min, x)
mm.max = math.Max(mm.max, x)
}
const MinTarget = 0.001
const epsilon = 0.0000001
// computeConcurrencyAllocation returns the unique `allocs []float64`, and
// an associated `fairProp float64`, that jointly have
// all of the following properties (to the degree that floating point calculations allow)
// if possible otherwise returns an error saying why it is impossible.
// `allocs` sums to `requiredSum`.
// For each J in [0, len(classes)):
// (1) `classes[J].lowerBound <= allocs[J] <= classes[J].upperBound` and
// (2) exactly one of the following is true:
// (2a) `allocs[J] == fairProp * classes[J].target`,
// (2b) `allocs[J] == classes[J].lowerBound && classes[J].lowerBound > fairProp * classes[J].target`, or
// (2c) `allocs[J] == classes[J].upperBound && classes[J].upperBound < fairProp * classes[J].target`.
// Each allocProblemItem is required to have `target >= lowerBound >= 0` and `upperBound >= lowerBound`.
// A target smaller than MinTarget is treated as if it were MinTarget.
func computeConcurrencyAllocation(requiredSum int, classes []allocProblemItem) ([]float64, float64, error) {
if requiredSum < 0 {
return nil, 0, errors.New("negative sums are not supported")
}
requiredSumF := float64(requiredSum)
var lowSum, highSum, targetSum float64
ubRange := minMax{min: float64(math.MaxFloat32)}
lbRange := minMax{min: float64(math.MaxFloat32)}
relativeItems := make([]relativeAllocItem, len(classes))
for idx, item := range classes {
target := item.target
if item.lowerBound < 0 {
return nil, 0, fmt.Errorf("lower bound %d is %v but negative lower bounds are not allowed", idx, item.lowerBound)
}
if target < item.lowerBound {
return nil, 0, fmt.Errorf("target %d is %v, which is below its lower bound of %v", idx, target, item.lowerBound)
}
if item.upperBound < item.lowerBound {
return nil, 0, fmt.Errorf("upper bound %d is %v but should not be less than the lower bound %v", idx, item.upperBound, item.lowerBound)
}
if target < MinTarget {
// tweak this to a non-zero value so avoid dividing by zero
target = MinTarget
}
lowSum += item.lowerBound
highSum += item.upperBound
targetSum += target
relativeItem := relativeAllocItem{
target: target,
relativeLowerBound: item.lowerBound / target,
relativeUpperBound: item.upperBound / target,
}
ubRange.note(relativeItem.relativeUpperBound)
lbRange.note(relativeItem.relativeLowerBound)
relativeItems[idx] = relativeItem
}
if lbRange.max > 1 {
return nil, 0, fmt.Errorf("lbRange.max-1=%v, which is impossible because lbRange.max can not be greater than 1", lbRange.max-1)
}
if lowSum-requiredSumF > epsilon {
return nil, 0, fmt.Errorf("lower bounds sum to %v, which is higher than the required sum of %v", lowSum, requiredSum)
}
if requiredSumF-highSum > epsilon {
return nil, 0, fmt.Errorf("upper bounds sum to %v, which is lower than the required sum of %v", highSum, requiredSum)
}
ans := make([]float64, len(classes))
if requiredSum == 0 {
return ans, 0, nil
}
if lowSum-requiredSumF > -epsilon { // no wiggle room, constrained from below
for idx, item := range classes {
ans[idx] = item.lowerBound
}
return ans, lbRange.min, nil
}
if requiredSumF-highSum > -epsilon { // no wiggle room, constrained from above
for idx, item := range classes {
ans[idx] = item.upperBound
}
return ans, ubRange.max, nil
}
// Now we know the solution is a unique fairProp in [lbRange.min, ubRange.max].
// See if the solution does not run into any bounds.
fairProp := requiredSumF / targetSum
if lbRange.max <= fairProp && fairProp <= ubRange.min { // no bounds matter
for idx := range classes {
ans[idx] = relativeItems[idx].target * fairProp
}
return ans, fairProp, nil
}
// Sadly, some bounds matter.
// We find the solution by sorting the bounds and considering progressively
// higher values of fairProp, starting from lbRange.min.
rap := (&relativeAllocProblem{items: relativeItems}).initIndices()
sumSoFar := lowSum
fairProp = lbRange.min
var sensitiveTargetSum, deltaSensitiveTargetSum float64
var numSensitiveClasses, deltaSensitiveClasses int
var nextIdx int
// `nextIdx` is the next `rap` index to consider.
// `sumSoFar` is what the allocs would sum to if the current
// value of `fairProp` solves the problem.
// If the current value of fairProp were the answer then
// `sumSoFar == requiredSum`.
// Otherwise the next increment in fairProp involves changing the allocations
// of `numSensitiveClasses` classes whose targets sum to `sensitiveTargetSum`;
// for the other classes, an upper or lower bound has applied and will continue to apply.
// The last increment of nextIdx calls for adding `deltaSensitiveClasses`
// to `numSensitiveClasses` and adding `deltaSensitiveTargetSum` to `sensitiveTargetSum`.
for sumSoFar < requiredSumF {
// There might be more than one bound that is equal to the current value
// of fairProp; find all of them because they will all be relevant to
// the next change in fairProp.
// Set nextBound to the next bound that is NOT equal to fairProp,
// and advance nextIdx to the index of that bound.
var nextBound float64
for {
sensitiveTargetSum += deltaSensitiveTargetSum
numSensitiveClasses += deltaSensitiveClasses
if nextIdx >= rap.Len() {
return nil, 0, fmt.Errorf("impossible: ran out of bounds to consider in bound-constrained problem")
}
var itemIdx int
var lower bool
nextBound, itemIdx, lower = rap.decode(nextIdx)
if lower {
deltaSensitiveClasses = 1
deltaSensitiveTargetSum = rap.items[itemIdx].target
} else {
deltaSensitiveClasses = -1
deltaSensitiveTargetSum = -rap.items[itemIdx].target
}
nextIdx++
if nextBound > fairProp {
break
}
}
// fairProp can increase to nextBound without passing any intermediate bounds.
if numSensitiveClasses == 0 {
// No classes are affected by the next range of fairProp; skip right past it
fairProp = nextBound
continue
}
// See whether fairProp can increase to the solution before passing the next bound.
deltaFairProp := (requiredSumF - sumSoFar) / sensitiveTargetSum
nextProp := fairProp + deltaFairProp
if nextProp <= nextBound {
fairProp = nextProp
break
}
// No, fairProp has to increase above nextBound
sumSoFar += (nextBound - fairProp) * sensitiveTargetSum
fairProp = nextBound
}
for idx, item := range classes {
ans[idx] = math.Max(item.lowerBound, math.Min(item.upperBound, fairProp*relativeItems[idx].target))
}
return ans, fairProp, nil
}

View File

@ -0,0 +1,60 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package debug
import (
"time"
"k8s.io/apiserver/pkg/endpoints/request"
flowcontrolrequest "k8s.io/apiserver/pkg/util/flowcontrol/request"
)
// QueueSetDump is an instant dump of queue-set.
type QueueSetDump struct {
Queues []QueueDump
Waiting int
Executing int
SeatsInUse int
SeatsWaiting int
}
// QueueDump is an instant dump of one queue in a queue-set.
type QueueDump struct {
QueueSum QueueSum
Requests []RequestDump
NextDispatchR string
ExecutingRequests int
SeatsInUse int
}
type QueueSum struct {
InitialSeatsSum int
MaxSeatsSum int
TotalWorkSum string
}
// RequestDump is an instant dump of one requests pending in the queue.
type RequestDump struct {
MatchedFlowSchema string
FlowDistinguisher string
ArriveTime time.Time
StartTime time.Time
WorkEstimate flowcontrolrequest.WorkEstimate
// request details
UserName string
RequestInfo request.RequestInfo
}

View File

@ -0,0 +1,47 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eventclock
import (
"time"
baseclock "k8s.io/utils/clock"
)
// EventFunc does some work that needs to be done at or after the
// given time.
type EventFunc func(time.Time)
// EventClock is an active clock abstraction for use in code that is
// testable with a fake clock that itself determines how time may be
// advanced. The timing paradigm is invoking EventFuncs rather than
// synchronizing through channels, so that the fake clock has a handle
// on when associated activity is done.
type Interface interface {
baseclock.PassiveClock
// Sleep returns after the given duration (or more).
Sleep(d time.Duration)
// EventAfterDuration invokes the given EventFunc after the given duration (or more),
// passing the time when the invocation was launched.
EventAfterDuration(f EventFunc, d time.Duration)
// EventAfterTime invokes the given EventFunc at the given time or later,
// passing the time when the invocation was launched.
EventAfterTime(f EventFunc, t time.Time)
}

View File

@ -0,0 +1,44 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eventclock
import (
"time"
"k8s.io/utils/clock"
)
// RealEventClock fires event on real world time
type Real struct {
clock.RealClock
}
var _ Interface = Real{}
// EventAfterDuration schedules an EventFunc
func (Real) EventAfterDuration(f EventFunc, d time.Duration) {
ch := time.After(d)
go func() {
t := <-ch
f(t)
}()
}
// EventAfterTime schedules an EventFunc
func (r Real) EventAfterTime(f EventFunc, t time.Time) {
r.EventAfterDuration(f, time.Until(t))
}

View File

@ -0,0 +1,191 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fairqueuing
import (
"math"
"sync"
"time"
fcmetrics "k8s.io/apiserver/pkg/util/flowcontrol/metrics"
"k8s.io/utils/clock"
)
// Integrator computes the moments of some variable X over time as
// read from a particular clock. The integrals start when the
// Integrator is created, and ends at the latest operation on the
// Integrator.
type Integrator interface {
fcmetrics.Gauge
GetResults() IntegratorResults
// Return the results of integrating to now, and reset integration to start now
Reset() IntegratorResults
}
// IntegratorResults holds statistical abstracts of the integration
type IntegratorResults struct {
Duration float64 //seconds
Average float64 //time-weighted
Deviation float64 //standard deviation: sqrt(avg((value-avg)^2))
Min, Max float64
}
// Equal tests for semantic equality.
// This considers all NaN values to be equal to each other.
func (x *IntegratorResults) Equal(y *IntegratorResults) bool {
return x == y || x != nil && y != nil && x.Duration == y.Duration && x.Min == y.Min && x.Max == y.Max && (x.Average == y.Average || math.IsNaN(x.Average) && math.IsNaN(y.Average)) && (x.Deviation == y.Deviation || math.IsNaN(x.Deviation) && math.IsNaN(y.Deviation))
}
type integrator struct {
name string
clock clock.PassiveClock
sync.Mutex
lastTime time.Time
x float64
moments Moments
min, max float64
}
// NewNamedIntegrator makes one that uses the given clock and name
func NewNamedIntegrator(clock clock.PassiveClock, name string) Integrator {
return &integrator{
name: name,
clock: clock,
lastTime: clock.Now(),
}
}
func (igr *integrator) Set(x float64) {
igr.Lock()
igr.setLocked(x)
igr.Unlock()
}
func (igr *integrator) Add(deltaX float64) {
igr.Lock()
igr.setLocked(igr.x + deltaX)
igr.Unlock()
}
func (igr *integrator) Inc() {
igr.Add(1)
}
func (igr *integrator) Dec() {
igr.Add(-1)
}
func (igr *integrator) SetToCurrentTime() {
igr.Set(float64(time.Now().UnixNano()))
}
func (igr *integrator) setLocked(x float64) {
igr.updateLocked()
igr.x = x
if x < igr.min {
igr.min = x
}
if x > igr.max {
igr.max = x
}
}
func (igr *integrator) updateLocked() {
now := igr.clock.Now()
dt := now.Sub(igr.lastTime).Seconds()
igr.lastTime = now
igr.moments = igr.moments.Add(ConstantMoments(dt, igr.x))
}
func (igr *integrator) GetResults() IntegratorResults {
igr.Lock()
defer igr.Unlock()
return igr.getResultsLocked()
}
func (igr *integrator) Reset() IntegratorResults {
igr.Lock()
defer igr.Unlock()
results := igr.getResultsLocked()
igr.moments = Moments{}
igr.min = igr.x
igr.max = igr.x
return results
}
func (igr *integrator) getResultsLocked() (results IntegratorResults) {
igr.updateLocked()
results.Min, results.Max = igr.min, igr.max
results.Duration = igr.moments.ElapsedSeconds
results.Average, results.Deviation = igr.moments.AvgAndStdDev()
return
}
// Moments are the integrals of the 0, 1, and 2 powers of some
// variable X over some range of time.
type Moments struct {
ElapsedSeconds float64 // integral of dt
IntegralX float64 // integral of x dt
IntegralXX float64 // integral of x*x dt
}
// ConstantMoments is for a constant X
func ConstantMoments(dt, x float64) Moments {
return Moments{
ElapsedSeconds: dt,
IntegralX: x * dt,
IntegralXX: x * x * dt,
}
}
// Add combines over two ranges of time
func (igr Moments) Add(ogr Moments) Moments {
return Moments{
ElapsedSeconds: igr.ElapsedSeconds + ogr.ElapsedSeconds,
IntegralX: igr.IntegralX + ogr.IntegralX,
IntegralXX: igr.IntegralXX + ogr.IntegralXX,
}
}
// Sub finds the difference between a range of time and a subrange
func (igr Moments) Sub(ogr Moments) Moments {
return Moments{
ElapsedSeconds: igr.ElapsedSeconds - ogr.ElapsedSeconds,
IntegralX: igr.IntegralX - ogr.IntegralX,
IntegralXX: igr.IntegralXX - ogr.IntegralXX,
}
}
// AvgAndStdDev returns the average and standard devation
func (igr Moments) AvgAndStdDev() (float64, float64) {
if igr.ElapsedSeconds <= 0 {
return math.NaN(), math.NaN()
}
avg := igr.IntegralX / igr.ElapsedSeconds
// standard deviation is sqrt( average( (x - xbar)^2 ) )
// = sqrt( Integral( x^2 + xbar^2 -2*x*xbar dt ) / Duration )
// = sqrt( ( Integral( x^2 dt ) + Duration * xbar^2 - 2*xbar*Integral(x dt) ) / Duration)
// = sqrt( Integral(x^2 dt)/Duration - xbar^2 )
variance := igr.IntegralXX/igr.ElapsedSeconds - avg*avg
if variance >= 0 {
return avg, math.Sqrt(variance)
}
return avg, math.NaN()
}

View File

@ -0,0 +1,136 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fairqueuing
import (
"context"
"time"
"k8s.io/apiserver/pkg/util/flowcontrol/debug"
"k8s.io/apiserver/pkg/util/flowcontrol/metrics"
"k8s.io/apiserver/pkg/util/flowcontrol/request"
)
// QueueSetFactory is used to create QueueSet objects. Creation, like
// config update, is done in two phases: the first phase consumes the
// QueuingConfig and the second consumes the DispatchingConfig. They
// are separated so that errors from the first phase can be found
// before committing to a concurrency allotment for the second.
type QueueSetFactory interface {
// BeginConstruction does the first phase of creating a QueueSet.
// The RatioedGaugePair observes number of requests,
// execution covering just the regular phase.
// The RatioedGauge observes number of seats occupied through all phases of execution.
// The Gauge observes the seat demand (executing + queued seats).
BeginConstruction(QueuingConfig, metrics.RatioedGaugePair, metrics.RatioedGauge, metrics.Gauge) (QueueSetCompleter, error)
}
// QueueSetCompleter finishes the two-step process of creating or
// reconfiguring a QueueSet
type QueueSetCompleter interface {
// Complete returns a QueueSet configured by the given
// dispatching configuration.
Complete(DispatchingConfig) QueueSet
}
// QueueSet is the abstraction for the queuing and dispatching
// functionality of one non-exempt priority level. It covers the
// functionality described in the "Assignment to a Queue", "Queuing",
// and "Dispatching" sections of
// https://github.com/kubernetes/enhancements/blob/master/keps/sig-api-machinery/1040-priority-and-fairness/README.md
// . Some day we may have connections between priority levels, but
// today is not that day.
type QueueSet interface {
// BeginConfigChange starts the two-step process of updating the
// configuration. No change is made until Complete is called. If
// `C := X.BeginConstruction(q)` then `C.Complete(d)` returns the
// same value `X`. If the QueuingConfig's DesiredNumQueues field
// is zero then the other queuing-specific config parameters are
// not changed, so that the queues continue draining as before.
// In any case, reconfiguration does not discard any queue unless
// and until it is undesired and empty.
BeginConfigChange(QueuingConfig) (QueueSetCompleter, error)
// IsIdle returns a bool indicating whether the QueueSet was idle
// at the moment of the return. Idle means the QueueSet has zero
// requests queued and zero executing. This bit can change only
// (1) during a call to StartRequest and (2) during a call to
// Request::Finish. In the latter case idleness can only change
// from false to true.
IsIdle() bool
// StartRequest begins the process of handling a request. If the
// request gets queued and the number of queues is greater than 1
// then StartRequest uses the given hashValue as the source of
// entropy as it shuffle-shards the request into a queue. The
// descr1 and descr2 values play no role in the logic but appear
// in log messages. This method always returns quickly (without
// waiting for the request to be dequeued). If this method
// returns a nil Request value then caller should reject the
// request and the returned bool indicates whether the QueueSet
// was idle at the moment of the return. Otherwise idle==false
// and the client must call the Finish method of the Request
// exactly once.
StartRequest(ctx context.Context, width *request.WorkEstimate, hashValue uint64, flowDistinguisher, fsName string, descr1, descr2 interface{}, queueNoteFn QueueNoteFn) (req Request, idle bool)
// Dump saves and returns the instant internal state of the queue-set.
// Note that dumping process will stop the queue-set from proceeding
// any requests.
// For debugging only.
Dump(includeRequestDetails bool) debug.QueueSetDump
}
// QueueNoteFn is called when a request enters and leaves a queue
type QueueNoteFn func(inQueue bool)
// Request represents the remainder of the handling of one request
type Request interface {
// Finish determines whether to execute or reject the request and
// invokes `execute` if the decision is to execute the request.
// The returned `idle bool` value indicates whether the QueueSet
// was idle when the value was calculated, but might no longer be
// accurate by the time the client examines that value.
Finish(execute func()) (idle bool)
}
// QueuingConfig defines the configuration of the queuing aspect of a QueueSet.
type QueuingConfig struct {
// Name is used to identify a queue set, allowing for descriptive information about its intended use
Name string
// DesiredNumQueues is the number of queues that the API says
// should exist now. This may be zero, in which case
// QueueLengthLimit, HandSize, and RequestWaitLimit are ignored.
DesiredNumQueues int
// QueueLengthLimit is the maximum number of requests that may be waiting in a given queue at a time
QueueLengthLimit int
// HandSize is a parameter of shuffle sharding. Upon arrival of a request, a queue is chosen by randomly
// dealing a "hand" of this many queues and then picking one of minimum length.
HandSize int
// RequestWaitLimit is the maximum amount of time that a request may wait in a queue.
// If, by the end of that time, the request has not been dispatched then it is rejected.
RequestWaitLimit time.Duration
}
// DispatchingConfig defines the configuration of the dispatching aspect of a QueueSet.
type DispatchingConfig struct {
// ConcurrencyLimit is the maximum number of requests of this QueueSet that may be executing at a time
ConcurrencyLimit int
}

View File

@ -0,0 +1,34 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package promise
// WriteOnce represents a variable that is initially not set and can
// be set once and is readable. This is the common meaning for
// "promise".
type WriteOnce interface {
// Get reads the current value of this variable. If this
// variable is not set yet then this call blocks until this
// variable gets a value.
Get() interface{}
// Set normally writes a value into this variable, unblocks every
// goroutine waiting for this variable to have a value, and
// returns true. In the unhappy case that this variable is
// already set, this method returns false without modifying the
// variable's value.
Set(interface{}) bool
}

View File

@ -0,0 +1,70 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package promise
import (
"sync"
)
// promise implements the WriteOnce interface.
type promise struct {
doneCh <-chan struct{}
doneVal interface{}
setCh chan struct{}
onceler sync.Once
value interface{}
}
var _ WriteOnce = &promise{}
// NewWriteOnce makes a new thread-safe WriteOnce.
//
// If `initial` is non-nil then that value is Set at creation time.
//
// If a `Get` is waiting soon after `doneCh` becomes selectable (which
// never happens for the nil channel) then `Set(doneVal)` effectively
// happens at that time.
func NewWriteOnce(initial interface{}, doneCh <-chan struct{}, doneVal interface{}) WriteOnce {
p := &promise{
doneCh: doneCh,
doneVal: doneVal,
setCh: make(chan struct{}),
}
if initial != nil {
p.Set(initial)
}
return p
}
func (p *promise) Get() interface{} {
select {
case <-p.setCh:
case <-p.doneCh:
p.Set(p.doneVal)
}
return p.value
}
func (p *promise) Set(value interface{}) bool {
var ans bool
p.onceler.Do(func() {
p.value = value
close(p.setCh)
ans = true
})
return ans
}

View File

@ -0,0 +1,119 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package queueset implements a technique called "fair queuing for
// server requests". One QueueSet is a set of queues operating
// according to this technique.
//
// Fair queuing for server requests is inspired by the fair queuing
// technique from the world of networking. You can find a good paper
// on that at https://dl.acm.org/citation.cfm?doid=75247.75248 or
// http://people.csail.mit.edu/imcgraw/links/research/pubs/networks/WFQ.pdf
// and there is an implementation outline in the Wikipedia article at
// https://en.wikipedia.org/wiki/Fair_queuing .
//
// Fair queuing for server requests differs from traditional fair
// queuing in three ways: (1) we are dispatching application layer
// requests to a server rather than transmitting packets on a network
// link, (2) multiple requests can be executing at once, and (3) the
// service time (execution duration) is not known until the execution
// completes.
//
// The first two differences can easily be handled by straightforward
// adaptation of the concept called "R(t)" in the original paper and
// "virtual time" in the implementation outline. In that
// implementation outline, the notation now() is used to mean reading
// the virtual clock. In the original papers terms, "R(t)" is the
// number of "rounds" that have been completed at real time t ---
// where a round consists of virtually transmitting one bit from every
// non-empty queue in the router (regardless of which queue holds the
// packet that is really being transmitted at the moment); in this
// conception, a packet is considered to be "in" its queue until the
// packets transmission is finished. For our problem, we can define a
// round to be giving one nanosecond of CPU to every non-empty queue
// in the apiserver (where emptiness is judged based on both queued
// and executing requests from that queue), and define R(t) = (server
// start time) + (1 ns) * (number of rounds since server start). Let
// us write NEQ(t) for that number of non-empty queues in the
// apiserver at time t. Let us also write C for the concurrency
// limit. In the original paper, the partial derivative of R(t) with
// respect to t is
//
// 1 / NEQ(t) .
//
// To generalize from transmitting one packet at a time to executing C
// requests at a time, that derivative becomes
//
// C / NEQ(t) .
//
// However, sometimes there are fewer than C requests available to
// execute. For a given queue "q", let us also write "reqs(q, t)" for
// the number of requests of that queue that are executing at that
// time. The total number of requests executing is sum[over q]
// reqs(q, t) and if that is less than C then virtual time is not
// advancing as fast as it would if all C seats were occupied; in this
// case the numerator of the quotient in that derivative should be
// adjusted proportionally. Putting it all together for fair queing
// for server requests: at a particular time t, the partial derivative
// of R(t) with respect to t is
//
// min( C, sum[over q] reqs(q, t) ) / NEQ(t) .
//
// In terms of the implementation outline, this is the rate at which
// virtual time is advancing at time t (in virtual nanoseconds per
// real nanosecond). Where the networking implementation outline adds
// packet size to a virtual time, in our version this corresponds to
// adding a service time (i.e., duration) to virtual time.
//
// The third difference is handled by modifying the algorithm to
// dispatch based on an initial guess at the requests service time
// (duration) and then make the corresponding adjustments once the
// requests actual service time is known. This is similar, although
// not exactly isomorphic, to the original papers adjustment by
// `$\delta$` for the sake of promptness.
//
// For implementation simplicity (see below), let us use the same
// initial service time guess for every request; call that duration
// G. A good choice might be the service time limit (1
// minute). Different guesses will give slightly different dynamics,
// but any positive number can be used for G without ruining the
// long-term behavior.
//
// As in ordinary fair queuing, there is a bound on divergence from
// the ideal. In plain fair queuing the bound is one packet; in our
// version it is C requests.
//
// To support efficiently making the necessary adjustments once a
// requests actual service time is known, the virtual finish time of
// a request and the last virtual finish time of a queue are not
// represented directly but instead computed from queue length,
// request position in the queue, and an alternate state variable that
// holds the queues virtual start time. While the queue is empty and
// has no requests executing: the value of its virtual start time
// variable is ignored and its last virtual finish time is considered
// to be in the virtual past. When a request arrives to an empty queue
// with no requests executing, the queues virtual start time is set
// to the current virtual time. The virtual finish time of request
// number J in the queue (counting from J=1 for the head) is J * G +
// (queue's virtual start time). While the queue is non-empty: the
// last virtual finish time of the queue is the virtual finish time of
// the last request in the queue. While the queue is empty and has a
// request executing: the last virtual finish time is the queues
// virtual start time. When a request is dequeued for service the
// queues virtual start time is advanced by G. When a request
// finishes being served, and the actual service time was S, the
// queues virtual start time is decremented by G - S.
package queueset

View File

@ -0,0 +1,156 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queueset
import (
"container/list"
)
// removeFromFIFOFunc removes a designated element from the list
// if that element is in the list.
// The complexity of the runtime cost is O(1).
// The returned value is the element removed, if indeed one was removed,
// otherwise `nil`.
type removeFromFIFOFunc func() *request
// walkFunc is called for each request in the list in the
// oldest -> newest order.
// ok: if walkFunc returns false then the iteration stops immediately.
// walkFunc may remove the given request from the fifo,
// but may not mutate the fifo in any othe way.
type walkFunc func(*request) (ok bool)
// Internal interface to abstract out the implementation details
// of the underlying list used to maintain the requests.
//
// Note that a fifo, including the removeFromFIFOFuncs returned from Enqueue,
// is not safe for concurrent use by multiple goroutines.
type fifo interface {
// Enqueue enqueues the specified request into the list and
// returns a removeFromFIFOFunc function that can be used to remove the
// request from the list
Enqueue(*request) removeFromFIFOFunc
// Dequeue pulls out the oldest request from the list.
Dequeue() (*request, bool)
// Peek returns the oldest request without removing it.
Peek() (*request, bool)
// Length returns the number of requests in the list.
Length() int
// QueueSum returns the sum of initial seats, final seats, and
// additional latency aggregated from all requests in this queue.
QueueSum() queueSum
// Walk iterates through the list in order of oldest -> newest
// and executes the specified walkFunc for each request in that order.
//
// if the specified walkFunc returns false the Walk function
// stops the walk an returns immediately.
Walk(walkFunc)
}
// the FIFO list implementation is not safe for concurrent use by multiple
// goroutines.
type requestFIFO struct {
*list.List
sum queueSum
}
func newRequestFIFO() fifo {
return &requestFIFO{
List: list.New(),
}
}
func (l *requestFIFO) Length() int {
return l.Len()
}
func (l *requestFIFO) QueueSum() queueSum {
return l.sum
}
func (l *requestFIFO) Enqueue(req *request) removeFromFIFOFunc {
e := l.PushBack(req)
addToQueueSum(&l.sum, req)
return func() *request {
if e.Value == nil {
return nil
}
l.Remove(e)
e.Value = nil
deductFromQueueSum(&l.sum, req)
return req
}
}
func (l *requestFIFO) Dequeue() (*request, bool) {
return l.getFirst(true)
}
func (l *requestFIFO) Peek() (*request, bool) {
return l.getFirst(false)
}
func (l *requestFIFO) getFirst(remove bool) (*request, bool) {
e := l.Front()
if e == nil {
return nil, false
}
if remove {
defer func() {
l.Remove(e)
e.Value = nil
}()
}
request, ok := e.Value.(*request)
if remove && ok {
deductFromQueueSum(&l.sum, request)
}
return request, ok
}
func (l *requestFIFO) Walk(f walkFunc) {
var next *list.Element
for current := l.Front(); current != nil; current = next {
next = current.Next() // f is allowed to remove current
if r, ok := current.Value.(*request); ok {
if !f(r) {
return
}
}
}
}
func addToQueueSum(sum *queueSum, req *request) {
sum.InitialSeatsSum += req.InitialSeats()
sum.MaxSeatsSum += req.MaxSeats()
sum.TotalWorkSum += req.totalWork()
}
func deductFromQueueSum(sum *queueSum, req *request) {
sum.InitialSeatsSum -= req.InitialSeats()
sum.MaxSeatsSum -= req.MaxSeats()
sum.TotalWorkSum -= req.totalWork()
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,183 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queueset
import (
"context"
"time"
genericrequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/util/flowcontrol/debug"
fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
"k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/promise"
fcrequest "k8s.io/apiserver/pkg/util/flowcontrol/request"
)
// request is a temporary container for "requests" with additional
// tracking fields required for QueueSet functionality.
type request struct {
ctx context.Context
qs *queueSet
flowDistinguisher string
fsName string
// The relevant queue. Is nil if this request did not go through
// a queue.
queue *queue
// estimated amount of work of the request
workEstimate completedWorkEstimate
// decision gets set to a `requestDecision` indicating what to do
// with this request. It gets set exactly once, when the request
// is removed from its queue. The value will be decisionReject,
// decisionCancel, or decisionExecute.
//
// decision.Set is called with the queueSet locked.
// decision.Get is called without the queueSet locked.
decision promise.WriteOnce
// arrivalTime is the real time when the request entered this system
arrivalTime time.Time
// descr1 and descr2 are not used in any logic but they appear in
// log messages
descr1, descr2 interface{}
queueNoteFn fq.QueueNoteFn
// The preceding fields are filled in at creation and not modified since;
// the following fields may be modified later and must only be accessed while
// holding the queueSet's lock.
// Removes this request from its queue. If the request is not put into a
// a queue it will be nil.
removeFromQueueLocked removeFromFIFOFunc
// arrivalR is R(arrivalTime). R is, confusingly, also called "virtual time".
// This field is meaningful only while the request is waiting in the virtual world.
arrivalR fcrequest.SeatSeconds
// startTime is the real time when the request began executing
startTime time.Time
// Indicates whether client has called Request::Wait()
waitStarted bool
}
type completedWorkEstimate struct {
fcrequest.WorkEstimate
totalWork fcrequest.SeatSeconds // initial plus final work
finalWork fcrequest.SeatSeconds // only final work
}
// queue is a sequence of requests that have arrived but not yet finished
// execution in both the real and virtual worlds.
type queue struct {
// The requests not yet executing in the real world are stored in a FIFO list.
requests fifo
// nextDispatchR is the R progress meter reading at
// which the next request will be dispatched in the virtual world.
nextDispatchR fcrequest.SeatSeconds
// requestsExecuting is the count in the real world.
requestsExecuting int
// index is the position of this queue among those in its queueSet.
index int
// seatsInUse is the total number of "seats" currently occupied
// by all the requests that are currently executing in this queue.
seatsInUse int
}
// queueSum tracks the sum of initial seats, max seats, and
// totalWork from all requests in a given queue
type queueSum struct {
// InitialSeatsSum is the sum of InitialSeats
// associated with all requests in a given queue.
InitialSeatsSum int
// MaxSeatsSum is the sum of MaxSeats
// associated with all requests in a given queue.
MaxSeatsSum int
// TotalWorkSum is the sum of totalWork of the waiting requests
TotalWorkSum fcrequest.SeatSeconds
}
func (req *request) totalWork() fcrequest.SeatSeconds {
return req.workEstimate.totalWork
}
func (qs *queueSet) completeWorkEstimate(we *fcrequest.WorkEstimate) completedWorkEstimate {
finalWork := qs.computeFinalWork(we)
return completedWorkEstimate{
WorkEstimate: *we,
totalWork: qs.computeInitialWork(we) + finalWork,
finalWork: finalWork,
}
}
func (qs *queueSet) computeInitialWork(we *fcrequest.WorkEstimate) fcrequest.SeatSeconds {
return fcrequest.SeatsTimesDuration(float64(we.InitialSeats), qs.estimatedServiceDuration)
}
func (qs *queueSet) computeFinalWork(we *fcrequest.WorkEstimate) fcrequest.SeatSeconds {
return fcrequest.SeatsTimesDuration(float64(we.FinalSeats), we.AdditionalLatency)
}
func (q *queue) dumpLocked(includeDetails bool) debug.QueueDump {
digest := make([]debug.RequestDump, q.requests.Length())
i := 0
q.requests.Walk(func(r *request) bool {
// dump requests.
digest[i].MatchedFlowSchema = r.fsName
digest[i].FlowDistinguisher = r.flowDistinguisher
digest[i].ArriveTime = r.arrivalTime
digest[i].StartTime = r.startTime
digest[i].WorkEstimate = r.workEstimate.WorkEstimate
if includeDetails {
userInfo, _ := genericrequest.UserFrom(r.ctx)
digest[i].UserName = userInfo.GetName()
requestInfo, ok := genericrequest.RequestInfoFrom(r.ctx)
if ok {
digest[i].RequestInfo = *requestInfo
}
}
i++
return true
})
sum := q.requests.QueueSum()
queueSum := debug.QueueSum{
InitialSeatsSum: sum.InitialSeatsSum,
MaxSeatsSum: sum.MaxSeatsSum,
TotalWorkSum: sum.TotalWorkSum.String(),
}
return debug.QueueDump{
NextDispatchR: q.nextDispatchR.String(),
Requests: digest,
ExecutingRequests: q.requestsExecuting,
SeatsInUse: q.seatsInUse,
QueueSum: queueSum,
}
}

View File

@ -0,0 +1,231 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package format
import (
"bytes"
"encoding/json"
"fmt"
flowcontrol "k8s.io/api/flowcontrol/v1beta3"
"k8s.io/apiserver/pkg/authentication/user"
"k8s.io/apiserver/pkg/endpoints/request"
)
// This file provides an easy way to mark a value for formatting to
// `%s` in full detail IF it is printed but without costing a lot of
// CPU or memory if the value is NOT printed. The API Priority and
// Fairness API objects are formatted into JSON. The other types of
// objects here are formatted into golang source.
// Stringer marks the given value for custom formatting by this package.
type Stringer struct{ val interface{} }
// Fmt marks the given value for custom formatting by this package.
func Fmt(val interface{}) Stringer {
return Stringer{val}
}
// String formats to a string in full detail
func (sr Stringer) String() string {
if sr.val == nil {
return "nil"
}
switch typed := sr.val.(type) {
case *flowcontrol.FlowSchema,
flowcontrol.FlowSchema,
flowcontrol.FlowSchemaSpec,
flowcontrol.FlowDistinguisherMethod,
*flowcontrol.FlowDistinguisherMethod,
*flowcontrol.PolicyRulesWithSubjects,
flowcontrol.PolicyRulesWithSubjects,
flowcontrol.Subject,
flowcontrol.ResourcePolicyRule,
flowcontrol.NonResourcePolicyRule,
flowcontrol.FlowSchemaCondition,
*flowcontrol.PriorityLevelConfiguration,
flowcontrol.PriorityLevelConfiguration,
flowcontrol.PriorityLevelConfigurationSpec,
*flowcontrol.LimitedPriorityLevelConfiguration,
flowcontrol.LimitedPriorityLevelConfiguration,
flowcontrol.LimitResponse,
*flowcontrol.QueuingConfiguration,
flowcontrol.QueuingConfiguration:
return ToJSON(sr.val)
case []user.Info:
return FmtUsers(typed)
case []*request.RequestInfo:
return FmtRequests(typed)
default:
return fmt.Sprintf("%#+v", sr.val)
}
}
// ToJSON converts using encoding/json and handles errors by
// formatting them
func ToJSON(val interface{}) string {
bs, err := json.Marshal(val)
str := string(bs)
if err != nil {
str = str + "<" + err.Error() + ">"
}
return str
}
// FmtPriorityLevelConfiguration returns a golang source expression
// equivalent to the given value
func FmtPriorityLevelConfiguration(pl *flowcontrol.PriorityLevelConfiguration) string {
if pl == nil {
return "nil"
}
var buf bytes.Buffer
buf.WriteString(fmt.Sprintf("&flowcontrolv1beta3.PriorityLevelConfiguration{ObjectMeta: %#+v, Spec: ",
pl.ObjectMeta))
BufferPriorityLevelConfigurationSpec(&buf, &pl.Spec)
buf.WriteString(fmt.Sprintf(", Status: %#+v}", pl.Status))
return buf.String()
}
// FmtPriorityLevelConfigurationSpec returns a golang source
// expression equivalent to the given value
func FmtPriorityLevelConfigurationSpec(plSpec *flowcontrol.PriorityLevelConfigurationSpec) string {
var buf bytes.Buffer
BufferPriorityLevelConfigurationSpec(&buf, plSpec)
return buf.String()
}
// BufferPriorityLevelConfigurationSpec writes a golang source
// expression for the given value to the given buffer
func BufferPriorityLevelConfigurationSpec(buf *bytes.Buffer, plSpec *flowcontrol.PriorityLevelConfigurationSpec) {
buf.WriteString(fmt.Sprintf("flowcontrolv1beta3.PriorityLevelConfigurationSpec{Type: %#v", plSpec.Type))
if plSpec.Limited != nil {
buf.WriteString(fmt.Sprintf(", Limited: &flowcontrol.LimitedPriorityLevelConfiguration{NominalConcurrencyShares:%d, LimitResponse:flowcontrol.LimitResponse{Type:%#v", plSpec.Limited.NominalConcurrencyShares, plSpec.Limited.LimitResponse.Type))
if plSpec.Limited.LimitResponse.Queuing != nil {
buf.WriteString(fmt.Sprintf(", Queuing:&%#+v", *plSpec.Limited.LimitResponse.Queuing))
}
buf.WriteString(" } }")
}
buf.WriteString("}")
}
// FmtFlowSchema produces a golang source expression of the value.
func FmtFlowSchema(fs *flowcontrol.FlowSchema) string {
if fs == nil {
return "nil"
}
var buf bytes.Buffer
buf.WriteString(fmt.Sprintf("&flowcontrolv1beta3.FlowSchema{ObjectMeta: %#+v, Spec: ",
fs.ObjectMeta))
BufferFlowSchemaSpec(&buf, &fs.Spec)
buf.WriteString(fmt.Sprintf(", Status: %#+v}", fs.Status))
return buf.String()
}
// FmtFlowSchemaSpec produces a golang source expression equivalent to
// the given spec
func FmtFlowSchemaSpec(fsSpec *flowcontrol.FlowSchemaSpec) string {
var buf bytes.Buffer
BufferFlowSchemaSpec(&buf, fsSpec)
return buf.String()
}
// BufferFlowSchemaSpec writes a golang source expression for the
// given value to the given buffer
func BufferFlowSchemaSpec(buf *bytes.Buffer, fsSpec *flowcontrol.FlowSchemaSpec) {
buf.WriteString(fmt.Sprintf("flowcontrolv1beta3.FlowSchemaSpec{PriorityLevelConfiguration: %#+v, MatchingPrecedence: %d, DistinguisherMethod: ",
fsSpec.PriorityLevelConfiguration,
fsSpec.MatchingPrecedence))
if fsSpec.DistinguisherMethod == nil {
buf.WriteString("nil")
} else {
buf.WriteString(fmt.Sprintf("&%#+v", *fsSpec.DistinguisherMethod))
}
buf.WriteString(", Rules: []flowcontrol.PolicyRulesWithSubjects{")
for idx, rule := range fsSpec.Rules {
if idx > 0 {
buf.WriteString(", ")
}
BufferFmtPolicyRulesWithSubjectsSlim(buf, rule)
}
buf.WriteString("}}")
}
// FmtPolicyRulesWithSubjects produces a golang source expression of the value.
func FmtPolicyRulesWithSubjects(rule flowcontrol.PolicyRulesWithSubjects) string {
return "flowcontrolv1beta3.PolicyRulesWithSubjects" + FmtPolicyRulesWithSubjectsSlim(rule)
}
// FmtPolicyRulesWithSubjectsSlim produces a golang source expression
// of the value but without the leading type name. See above for an
// example context where this is useful.
func FmtPolicyRulesWithSubjectsSlim(rule flowcontrol.PolicyRulesWithSubjects) string {
var buf bytes.Buffer
BufferFmtPolicyRulesWithSubjectsSlim(&buf, rule)
return buf.String()
}
// BufferFmtPolicyRulesWithSubjectsSlim writes a golang source
// expression for the given value to the given buffer but excludes the
// leading type name
func BufferFmtPolicyRulesWithSubjectsSlim(buf *bytes.Buffer, rule flowcontrol.PolicyRulesWithSubjects) {
buf.WriteString("{Subjects: []flowcontrolv1beta3.Subject{")
for jdx, subj := range rule.Subjects {
if jdx > 0 {
buf.WriteString(", ")
}
buf.WriteString(fmt.Sprintf("{Kind: %q", subj.Kind))
if subj.User != nil {
buf.WriteString(fmt.Sprintf(", User: &%#+v", *subj.User))
}
if subj.Group != nil {
buf.WriteString(fmt.Sprintf(", Group: &%#+v", *subj.Group))
}
if subj.ServiceAccount != nil {
buf.WriteString(fmt.Sprintf(", ServiceAccount: &%#+v", *subj.ServiceAccount))
}
buf.WriteString("}")
}
buf.WriteString(fmt.Sprintf("}, ResourceRules: %#+v, NonResourceRules: %#+v}", rule.ResourceRules, rule.NonResourceRules))
}
// FmtUsers produces a golang source expression of the value.
func FmtUsers(list []user.Info) string {
var buf bytes.Buffer
buf.WriteString("[]user.Info{")
for idx, member := range list {
if idx > 0 {
buf.WriteString(", ")
}
buf.WriteString(fmt.Sprintf("%#+v", member))
}
buf.WriteString("}")
return buf.String()
}
// FmtRequests produces a golang source expression of the value.
func FmtRequests(list []*request.RequestInfo) string {
var buf bytes.Buffer
buf.WriteString("[]*request.RequestInfo{")
for idx, member := range list {
if idx > 0 {
buf.WriteString(", ")
}
buf.WriteString(fmt.Sprintf("%#+v", member))
}
buf.WriteString("}")
return buf.String()
}

View File

@ -0,0 +1,40 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flowcontrol
import (
"fmt"
fcfmt "k8s.io/apiserver/pkg/util/flowcontrol/format"
)
var _ fmt.GoStringer = RequestDigest{}
// GoString produces a golang source expression of the value.
func (rd RequestDigest) GoString() string {
return fmt.Sprintf("RequestDigest{RequestInfo: %#+v, User: %#+v}", rd.RequestInfo, rd.User)
}
var _ fmt.GoStringer = (*priorityLevelState)(nil)
// GoString produces a golang source expression of the value.
func (pls *priorityLevelState) GoString() string {
if pls == nil {
return "nil"
}
return fmt.Sprintf("&priorityLevelState{pl:%s, qsCompleter:%#+v, queues:%#+v, quiescing:%#v, numPending:%d}", fcfmt.Fmt(pls.pl), pls.qsCompleter, pls.queues, pls.quiescing, pls.numPending)
}

View File

@ -0,0 +1,67 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
// Gauge is the methods of a gauge that are used by instrumented code.
type Gauge interface {
Set(float64)
Inc()
Dec()
Add(float64)
SetToCurrentTime()
}
// RatioedGauge tracks ratios.
// The numerator is set/changed through the Gauge methods,
// and the denominator can be updated through the SetDenominator method.
// A ratio is tracked whenever the numerator or denominator is set/changed.
type RatioedGauge interface {
Gauge
// SetDenominator sets the denominator to use until it is changed again
SetDenominator(float64)
}
// RatioedGaugeVec creates related observers that are
// differentiated by a series of label values
type RatioedGaugeVec interface {
// NewForLabelValuesSafe makes a new vector member for the given tuple of label values,
// initialized with the given numerator and denominator.
// Unlike the usual Vec WithLabelValues method, this is intended to be called only
// once per vector member (at the start of its lifecycle).
// The "Safe" part is saying that the returned object will function properly after metric registration
// even if this method is called before registration.
NewForLabelValuesSafe(initialNumerator, initialDenominator float64, labelValues []string) RatioedGauge
}
//////////////////////////////// Pairs ////////////////////////////////
//
// API Priority and Fairness tends to use RatioedGaugeVec members in pairs,
// one for requests waiting in a queue and one for requests being executed.
// The following definitions are a convenience layer that adds support for that
// particular pattern of usage.
// RatioedGaugePair is a corresponding pair of gauges, one for the
// number of requests waiting in queue(s) and one for the number of
// requests being executed.
type RatioedGaugePair struct {
// RequestsWaiting is given observations of the number of currently queued requests
RequestsWaiting RatioedGauge
// RequestsExecuting is given observations of the number of requests currently executing
RequestsExecuting RatioedGauge
}

View File

@ -0,0 +1,600 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"context"
"strconv"
"strings"
"sync"
"time"
epmetrics "k8s.io/apiserver/pkg/endpoints/metrics"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
basemetricstestutil "k8s.io/component-base/metrics/testutil"
)
const (
namespace = "apiserver"
subsystem = "flowcontrol"
)
const (
requestKind = "request_kind"
priorityLevel = "priority_level"
flowSchema = "flow_schema"
phase = "phase"
LabelNamePhase = "phase"
LabelValueWaiting = "waiting"
LabelValueExecuting = "executing"
)
var (
queueLengthBuckets = []float64{0, 10, 25, 50, 100, 250, 500, 1000}
requestDurationSecondsBuckets = []float64{0, 0.005, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30}
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
registerMetrics.Do(func() {
for _, metric := range metrics {
legacyregistry.MustRegister(metric)
}
})
}
type resettable interface {
Reset()
}
// Reset all resettable metrics to zero
func Reset() {
for _, metric := range metrics {
if rm, ok := metric.(resettable); ok {
rm.Reset()
}
}
}
// GatherAndCompare the given metrics with the given Prometheus syntax expected value
func GatherAndCompare(expected string, metricNames ...string) error {
return basemetricstestutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), metricNames...)
}
// Registerables is a slice of Registerable
type Registerables []compbasemetrics.Registerable
// Append adds more
func (rs Registerables) Append(more ...compbasemetrics.Registerable) Registerables {
return append(rs, more...)
}
var (
apiserverRejectedRequestsTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rejected_requests_total",
Help: "Number of requests rejected by API Priority and Fairness subsystem",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema, "reason"},
)
apiserverDispatchedRequestsTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "dispatched_requests_total",
Help: "Number of requests executed by API Priority and Fairness subsystem",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
// PriorityLevelExecutionSeatsGaugeVec creates observers of seats occupied throughout execution for priority levels
PriorityLevelExecutionSeatsGaugeVec = NewTimingRatioHistogramVec(
&compbasemetrics.TimingHistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "priority_level_seat_utilization",
Help: "Observations, at the end of every nanosecond, of utilization of seats for any stage of execution (but only initial stage for WATCHes)",
// Buckets for both 0.99 and 1.0 mean PromQL's histogram_quantile will reveal saturation
Buckets: []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 1},
ConstLabels: map[string]string{phase: "executing"},
StabilityLevel: compbasemetrics.ALPHA,
},
priorityLevel,
)
// PriorityLevelConcurrencyGaugeVec creates gauges of concurrency broken down by phase, priority level
PriorityLevelConcurrencyGaugeVec = NewTimingRatioHistogramVec(
&compbasemetrics.TimingHistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "priority_level_request_utilization",
Help: "Observations, at the end of every nanosecond, of number of requests (as a fraction of the relevant limit) waiting or in any stage of execution (but only initial stage for WATCHes)",
// For executing: the denominator will be seats, so this metric will skew low.
// For waiting: total queue capacity is generally quite generous, so this metric will skew low.
Buckets: []float64{0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.25, 0.5, 0.75, 1},
StabilityLevel: compbasemetrics.ALPHA,
},
LabelNamePhase, priorityLevel,
)
// readWriteConcurrencyGaugeVec creates ratioed gauges of requests/limit broken down by phase and mutating vs readonly
readWriteConcurrencyGaugeVec = NewTimingRatioHistogramVec(
&compbasemetrics.TimingHistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "read_vs_write_current_requests",
Help: "Observations, at the end of every nanosecond, of the number of requests (as a fraction of the relevant limit) waiting or in regular stage of execution",
// This metric will skew low for the same reason as the priority level metrics
// and also because APF has a combined limit for mutating and readonly.
Buckets: []float64{0, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 1},
StabilityLevel: compbasemetrics.ALPHA,
},
LabelNamePhase, requestKind,
)
apiserverCurrentR = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "current_r",
Help: "R(time of last change)",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverDispatchR = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "dispatch_r",
Help: "R(time of last dispatch)",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverLatestS = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "latest_s",
Help: "S(most recently dispatched request)",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverNextSBounds = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "next_s_bounds",
Help: "min and max, over queues, of S(oldest waiting request in queue)",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, "bound"},
)
apiserverNextDiscountedSBounds = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "next_discounted_s_bounds",
Help: "min and max, over queues, of S(oldest waiting request in queue) - estimated work in progress",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, "bound"},
)
apiserverCurrentInqueueRequests = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "current_inqueue_requests",
Help: "Number of requests currently pending in queues of the API Priority and Fairness subsystem",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
apiserverRequestQueueLength = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_queue_length_after_enqueue",
Help: "Length of queue in the API Priority and Fairness subsystem, as seen by each request after it is enqueued",
Buckets: queueLengthBuckets,
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
apiserverRequestConcurrencyLimit = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_concurrency_limit",
Help: "Shared concurrency limit in the API Priority and Fairness subsystem",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverCurrentExecutingRequests = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "current_executing_requests",
Help: "Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution stage in the API Priority and Fairness subsystem",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
apiserverRequestConcurrencyInUse = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_concurrency_in_use",
Help: "Concurrency (number of seats) occupied by the currently executing (initial stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness subsystem",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
apiserverRequestWaitingSeconds = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_wait_duration_seconds",
Help: "Length of time a request spent waiting in its queue",
Buckets: requestDurationSecondsBuckets,
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema, "execute"},
)
apiserverRequestExecutionSeconds = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_execution_seconds",
Help: "Duration of initial stage (for a WATCH) or any (for a non-WATCH) stage of request execution in the API Priority and Fairness subsystem",
Buckets: requestDurationSecondsBuckets,
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema, "type"},
)
watchCountSamples = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "watch_count_samples",
Help: "count of watchers for mutating requests in API Priority and Fairness",
Buckets: []float64{0, 1, 10, 100, 1000, 10000},
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
apiserverEpochAdvances = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "epoch_advance_total",
Help: "Number of times the queueset's progress meter jumped backward",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, "success"},
)
apiserverWorkEstimatedSeats = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "work_estimated_seats",
Help: "Number of estimated seats (maximum of initial and final seats) associated with requests in API Priority and Fairness",
// the upper bound comes from the maximum number of seats a request
// can occupy which is currently set at 10.
Buckets: []float64{1, 2, 4, 10},
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
apiserverDispatchWithNoAccommodation = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_dispatch_no_accommodation_total",
Help: "Number of times a dispatch attempt resulted in a non accommodation due to lack of available seats",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel, flowSchema},
)
apiserverNominalConcurrencyLimits = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "nominal_limit_seats",
Help: "Nominal number of execution seats configured for each priority level",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverMinimumConcurrencyLimits = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "lower_limit_seats",
Help: "Configured lower bound on number of execution seats available to each priority level",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverMaximumConcurrencyLimits = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "upper_limit_seats",
Help: "Configured upper bound on number of execution seats available to each priority level",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
ApiserverSeatDemands = NewTimingRatioHistogramVec(
&compbasemetrics.TimingHistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "demand_seats",
Help: "Observations, at the end of every nanosecond, of (the number of seats each priority level could use) / (nominal number of seats for that level)",
// Rationale for the bucket boundaries:
// For 0--1, evenly spaced and not too many;
// For 1--2, roughly powers of sqrt(sqrt(2));
// For 2--6, roughly powers of sqrt(2);
// We need coverage over 1, but do not want too many buckets.
Buckets: []float64{0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.7, 2, 2.8, 4, 6},
StabilityLevel: compbasemetrics.ALPHA,
},
priorityLevel,
)
apiserverSeatDemandHighWatermarks = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "demand_seats_high_watermark",
Help: "High watermark, over last adjustment period, of demand_seats",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverSeatDemandAverages = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "demand_seats_average",
Help: "Time-weighted average, over last adjustment period, of demand_seats",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverSeatDemandStandardDeviations = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "demand_seats_stdev",
Help: "Time-weighted standard deviation, over last adjustment period, of demand_seats",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverSeatDemandSmootheds = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "demand_seats_smoothed",
Help: "Smoothed seat demands",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverSeatDemandTargets = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "target_seats",
Help: "Seat allocation targets",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
apiserverFairFracs = compbasemetrics.NewGauge(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "seat_fair_frac",
Help: "Fair fraction of server's concurrency to allocate to each priority level that can use it",
StabilityLevel: compbasemetrics.ALPHA,
})
apiserverCurrentConcurrencyLimits = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "current_limit_seats",
Help: "current derived number of execution seats available to each priority level",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{priorityLevel},
)
metrics = Registerables{
apiserverRejectedRequestsTotal,
apiserverDispatchedRequestsTotal,
apiserverCurrentR,
apiserverDispatchR,
apiserverLatestS,
apiserverNextSBounds,
apiserverNextDiscountedSBounds,
apiserverCurrentInqueueRequests,
apiserverRequestQueueLength,
apiserverRequestConcurrencyLimit,
apiserverRequestConcurrencyInUse,
apiserverCurrentExecutingRequests,
apiserverRequestWaitingSeconds,
apiserverRequestExecutionSeconds,
watchCountSamples,
apiserverEpochAdvances,
apiserverWorkEstimatedSeats,
apiserverDispatchWithNoAccommodation,
apiserverNominalConcurrencyLimits,
apiserverMinimumConcurrencyLimits,
apiserverMaximumConcurrencyLimits,
apiserverSeatDemandHighWatermarks,
apiserverSeatDemandAverages,
apiserverSeatDemandStandardDeviations,
apiserverSeatDemandSmootheds,
apiserverSeatDemandTargets,
apiserverFairFracs,
apiserverCurrentConcurrencyLimits,
}.
Append(PriorityLevelExecutionSeatsGaugeVec.metrics()...).
Append(PriorityLevelConcurrencyGaugeVec.metrics()...).
Append(readWriteConcurrencyGaugeVec.metrics()...).
Append(ApiserverSeatDemands.metrics()...)
)
type indexOnce struct {
labelValues []string
once sync.Once
gauge RatioedGauge
}
func (io *indexOnce) getGauge() RatioedGauge {
io.once.Do(func() {
io.gauge = readWriteConcurrencyGaugeVec.NewForLabelValuesSafe(0, 1, io.labelValues)
})
return io.gauge
}
var waitingReadonly = indexOnce{labelValues: []string{LabelValueWaiting, epmetrics.ReadOnlyKind}}
var executingReadonly = indexOnce{labelValues: []string{LabelValueExecuting, epmetrics.ReadOnlyKind}}
var waitingMutating = indexOnce{labelValues: []string{LabelValueWaiting, epmetrics.MutatingKind}}
var executingMutating = indexOnce{labelValues: []string{LabelValueExecuting, epmetrics.MutatingKind}}
// GetWaitingReadonlyConcurrency returns the gauge of number of readonly requests waiting / limit on those.
var GetWaitingReadonlyConcurrency = waitingReadonly.getGauge
// GetExecutingReadonlyConcurrency returns the gauge of number of executing readonly requests / limit on those.
var GetExecutingReadonlyConcurrency = executingReadonly.getGauge
// GetWaitingMutatingConcurrency returns the gauge of number of mutating requests waiting / limit on those.
var GetWaitingMutatingConcurrency = waitingMutating.getGauge
// GetExecutingMutatingConcurrency returns the gauge of number of executing mutating requests / limit on those.
var GetExecutingMutatingConcurrency = executingMutating.getGauge
// AddRequestsInQueues adds the given delta to the gauge of the # of requests in the queues of the specified flowSchema and priorityLevel
func AddRequestsInQueues(ctx context.Context, priorityLevel, flowSchema string, delta int) {
apiserverCurrentInqueueRequests.WithLabelValues(priorityLevel, flowSchema).Add(float64(delta))
}
// AddRequestsExecuting adds the given delta to the gauge of executing requests of the given flowSchema and priorityLevel
func AddRequestsExecuting(ctx context.Context, priorityLevel, flowSchema string, delta int) {
apiserverCurrentExecutingRequests.WithLabelValues(priorityLevel, flowSchema).Add(float64(delta))
}
// SetCurrentR sets the current-R (virtualTime) gauge for the given priority level
func SetCurrentR(priorityLevel string, r float64) {
apiserverCurrentR.WithLabelValues(priorityLevel).Set(r)
}
// SetLatestS sets the latest-S (virtual time of dispatched request) gauge for the given priority level
func SetDispatchMetrics(priorityLevel string, r, s, sMin, sMax, discountedSMin, discountedSMax float64) {
apiserverDispatchR.WithLabelValues(priorityLevel).Set(r)
apiserverLatestS.WithLabelValues(priorityLevel).Set(s)
apiserverNextSBounds.WithLabelValues(priorityLevel, "min").Set(sMin)
apiserverNextSBounds.WithLabelValues(priorityLevel, "max").Set(sMax)
apiserverNextDiscountedSBounds.WithLabelValues(priorityLevel, "min").Set(discountedSMin)
apiserverNextDiscountedSBounds.WithLabelValues(priorityLevel, "max").Set(discountedSMax)
}
// AddRequestConcurrencyInUse adds the given delta to the gauge of concurrency in use by
// the currently executing requests of the given flowSchema and priorityLevel
func AddRequestConcurrencyInUse(priorityLevel, flowSchema string, delta int) {
apiserverRequestConcurrencyInUse.WithLabelValues(priorityLevel, flowSchema).Add(float64(delta))
}
// AddReject increments the # of rejected requests for flow control
func AddReject(ctx context.Context, priorityLevel, flowSchema, reason string) {
apiserverRejectedRequestsTotal.WithContext(ctx).WithLabelValues(priorityLevel, flowSchema, reason).Add(1)
}
// AddDispatch increments the # of dispatched requests for flow control
func AddDispatch(ctx context.Context, priorityLevel, flowSchema string) {
apiserverDispatchedRequestsTotal.WithContext(ctx).WithLabelValues(priorityLevel, flowSchema).Add(1)
}
// ObserveQueueLength observes the queue length for flow control
func ObserveQueueLength(ctx context.Context, priorityLevel, flowSchema string, length int) {
apiserverRequestQueueLength.WithContext(ctx).WithLabelValues(priorityLevel, flowSchema).Observe(float64(length))
}
// ObserveWaitingDuration observes the queue length for flow control
func ObserveWaitingDuration(ctx context.Context, priorityLevel, flowSchema, execute string, waitTime time.Duration) {
apiserverRequestWaitingSeconds.WithContext(ctx).WithLabelValues(priorityLevel, flowSchema, execute).Observe(waitTime.Seconds())
}
// ObserveExecutionDuration observes the execution duration for flow control
func ObserveExecutionDuration(ctx context.Context, priorityLevel, flowSchema string, executionTime time.Duration) {
reqType := "regular"
if requestInfo, ok := apirequest.RequestInfoFrom(ctx); ok && requestInfo.Verb == "watch" {
reqType = requestInfo.Verb
}
apiserverRequestExecutionSeconds.WithContext(ctx).WithLabelValues(priorityLevel, flowSchema, reqType).Observe(executionTime.Seconds())
}
// ObserveWatchCount notes a sampling of a watch count
func ObserveWatchCount(ctx context.Context, priorityLevel, flowSchema string, count int) {
watchCountSamples.WithLabelValues(priorityLevel, flowSchema).Observe(float64(count))
}
// AddEpochAdvance notes an advance of the progress meter baseline for a given priority level
func AddEpochAdvance(ctx context.Context, priorityLevel string, success bool) {
apiserverEpochAdvances.WithContext(ctx).WithLabelValues(priorityLevel, strconv.FormatBool(success)).Inc()
}
// ObserveWorkEstimatedSeats notes a sampling of estimated seats associated with a request
func ObserveWorkEstimatedSeats(priorityLevel, flowSchema string, seats int) {
apiserverWorkEstimatedSeats.WithLabelValues(priorityLevel, flowSchema).Observe(float64(seats))
}
// AddDispatchWithNoAccommodation keeps track of number of times dispatch attempt results
// in a non accommodation due to lack of available seats.
func AddDispatchWithNoAccommodation(priorityLevel, flowSchema string) {
apiserverDispatchWithNoAccommodation.WithLabelValues(priorityLevel, flowSchema).Inc()
}
func SetPriorityLevelConfiguration(priorityLevel string, nominalCL, minCL, maxCL int) {
apiserverRequestConcurrencyLimit.WithLabelValues(priorityLevel).Set(float64(nominalCL))
apiserverNominalConcurrencyLimits.WithLabelValues(priorityLevel).Set(float64(nominalCL))
apiserverMinimumConcurrencyLimits.WithLabelValues(priorityLevel).Set(float64(minCL))
apiserverMaximumConcurrencyLimits.WithLabelValues(priorityLevel).Set(float64(maxCL))
}
func NotePriorityLevelConcurrencyAdjustment(priorityLevel string, seatDemandHWM, seatDemandAvg, seatDemandStdev, seatDemandSmoothed, seatDemandTarget float64, currentCL int) {
apiserverSeatDemandHighWatermarks.WithLabelValues(priorityLevel).Set(seatDemandHWM)
apiserverSeatDemandAverages.WithLabelValues(priorityLevel).Set(seatDemandAvg)
apiserverSeatDemandStandardDeviations.WithLabelValues(priorityLevel).Set(seatDemandStdev)
apiserverSeatDemandSmootheds.WithLabelValues(priorityLevel).Set(seatDemandSmoothed)
apiserverSeatDemandTargets.WithLabelValues(priorityLevel).Set(seatDemandTarget)
apiserverCurrentConcurrencyLimits.WithLabelValues(priorityLevel).Set(float64(currentCL))
}
func SetFairFrac(fairFrac float64) {
apiserverFairFracs.Set(fairFrac)
}

View File

@ -0,0 +1,225 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"context"
"sync"
"time"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/klog/v2"
)
// TimingRatioHistogram is essentially a gauge for a ratio where the client
// independently controls the numerator and denominator.
// When scraped it produces a histogram of samples of the ratio
// taken at the end of every nanosecond.
// `*TimingRatioHistogram` implements both Registerable and RatioedGauge.
type TimingRatioHistogram struct {
// The implementation is layered on TimingHistogram,
// adding the division by an occasionally adjusted denominator.
// Registerable is the registerable aspect.
// That is the registerable aspect of the underlying TimingHistogram.
compbasemetrics.Registerable
// timingRatioHistogramInner implements the RatioedGauge aspect.
timingRatioHistogramInner
}
// TimingRatioHistogramOpts is the constructor parameters of a TimingRatioHistogram.
// The `TimingHistogramOpts.InitialValue` is the initial numerator.
type TimingRatioHistogramOpts struct {
compbasemetrics.TimingHistogramOpts
InitialDenominator float64
}
// timingRatioHistogramInner implements the instrumentation aspect
type timingRatioHistogramInner struct {
nowFunc func() time.Time
getGaugeOfRatio func() Gauge
sync.Mutex
// access only with mutex locked
numerator, denominator float64
}
var _ RatioedGauge = &timingRatioHistogramInner{}
var _ RatioedGauge = &TimingRatioHistogram{}
var _ compbasemetrics.Registerable = &TimingRatioHistogram{}
// NewTimingHistogram returns an object which is TimingHistogram-like. However, nothing
// will be measured until the histogram is registered in at least one registry.
func NewTimingRatioHistogram(opts *TimingRatioHistogramOpts) *TimingRatioHistogram {
return NewTestableTimingRatioHistogram(time.Now, opts)
}
// NewTestableTimingHistogram adds injection of the clock
func NewTestableTimingRatioHistogram(nowFunc func() time.Time, opts *TimingRatioHistogramOpts) *TimingRatioHistogram {
ratioedOpts := opts.TimingHistogramOpts
ratioedOpts.InitialValue /= opts.InitialDenominator
th := compbasemetrics.NewTestableTimingHistogram(nowFunc, &ratioedOpts)
return &TimingRatioHistogram{
Registerable: th,
timingRatioHistogramInner: timingRatioHistogramInner{
nowFunc: nowFunc,
getGaugeOfRatio: func() Gauge { return th },
numerator: opts.InitialValue,
denominator: opts.InitialDenominator,
}}
}
func (trh *timingRatioHistogramInner) Set(numerator float64) {
trh.Lock()
defer trh.Unlock()
trh.numerator = numerator
ratio := numerator / trh.denominator
trh.getGaugeOfRatio().Set(ratio)
}
func (trh *timingRatioHistogramInner) Add(deltaNumerator float64) {
trh.Lock()
defer trh.Unlock()
numerator := trh.numerator + deltaNumerator
trh.numerator = numerator
ratio := numerator / trh.denominator
trh.getGaugeOfRatio().Set(ratio)
}
func (trh *timingRatioHistogramInner) Sub(deltaNumerator float64) {
trh.Add(-deltaNumerator)
}
func (trh *timingRatioHistogramInner) Inc() {
trh.Add(1)
}
func (trh *timingRatioHistogramInner) Dec() {
trh.Add(-1)
}
func (trh *timingRatioHistogramInner) SetToCurrentTime() {
trh.Set(float64(trh.nowFunc().Sub(time.Unix(0, 0))))
}
func (trh *timingRatioHistogramInner) SetDenominator(denominator float64) {
trh.Lock()
defer trh.Unlock()
trh.denominator = denominator
ratio := trh.numerator / denominator
trh.getGaugeOfRatio().Set(ratio)
}
// WithContext allows the normal TimingHistogram metric to pass in context.
// The context is no-op at the current level of development.
func (trh *timingRatioHistogramInner) WithContext(ctx context.Context) RatioedGauge {
return trh
}
// TimingRatioHistogramVec is a collection of TimingRatioHistograms that differ
// only in label values.
// `*TimingRatioHistogramVec` implements both Registerable and RatioedGaugeVec.
type TimingRatioHistogramVec struct {
// promote only the Registerable methods
compbasemetrics.Registerable
// delegate is TimingHistograms of the ratio
delegate compbasemetrics.GaugeVecMetric
}
var _ RatioedGaugeVec = &TimingRatioHistogramVec{}
var _ compbasemetrics.Registerable = &TimingRatioHistogramVec{}
// NewTimingHistogramVec constructs a new vector.
// `opts.InitialValue` is the initial ratio, but this applies
// only for the tiny period of time until NewForLabelValuesSafe sets
// the ratio based on the given initial numerator and denominator.
// Thus there is a tiny splinter of time during member construction when
// its underlying TimingHistogram is given the initial numerator rather than
// the initial ratio (which is obviously a non-issue when both are zero).
// Note the difficulties associated with extracting a member
// before registering the vector.
func NewTimingRatioHistogramVec(opts *compbasemetrics.TimingHistogramOpts, labelNames ...string) *TimingRatioHistogramVec {
return NewTestableTimingRatioHistogramVec(time.Now, opts, labelNames...)
}
// NewTestableTimingHistogramVec adds injection of the clock.
func NewTestableTimingRatioHistogramVec(nowFunc func() time.Time, opts *compbasemetrics.TimingHistogramOpts, labelNames ...string) *TimingRatioHistogramVec {
delegate := compbasemetrics.NewTestableTimingHistogramVec(nowFunc, opts, labelNames)
return &TimingRatioHistogramVec{
Registerable: delegate,
delegate: delegate,
}
}
func (v *TimingRatioHistogramVec) metrics() Registerables {
return Registerables{v}
}
// NewForLabelValuesChecked will return an error if this vec is not hidden and not yet registered
// or there is a syntactic problem with the labelValues.
func (v *TimingRatioHistogramVec) NewForLabelValuesChecked(initialNumerator, initialDenominator float64, labelValues []string) (RatioedGauge, error) {
underMember, err := v.delegate.WithLabelValuesChecked(labelValues...)
if err != nil {
return noopRatioed{}, err
}
underMember.Set(initialNumerator / initialDenominator)
return &timingRatioHistogramInner{
getGaugeOfRatio: func() Gauge { return underMember },
numerator: initialNumerator,
denominator: initialDenominator,
}, nil
}
// NewForLabelValuesSafe is the same as NewForLabelValuesChecked in cases where that does not
// return an error. When the unsafe version returns an error due to the vector not being
// registered yet, the safe version returns an object that implements its methods
// by looking up the relevant vector member in each call (thus getting a non-noop after registration).
// In the other error cases the object returned here is a noop.
func (v *TimingRatioHistogramVec) NewForLabelValuesSafe(initialNumerator, initialDenominator float64, labelValues []string) RatioedGauge {
tro, err := v.NewForLabelValuesChecked(initialNumerator, initialDenominator, labelValues)
if err == nil {
klog.V(3).InfoS("TimingRatioHistogramVec.NewForLabelValuesSafe hit the efficient case", "fqName", v.FQName(), "labelValues", labelValues)
return tro
}
if !compbasemetrics.ErrIsNotRegistered(err) {
klog.ErrorS(err, "Failed to extract TimingRatioHistogramVec member, using noop instead", "vectorname", v.FQName(), "labelValues", labelValues)
return tro
}
klog.V(3).InfoS("TimingRatioHistogramVec.NewForLabelValuesSafe hit the inefficient case", "fqName", v.FQName(), "labelValues", labelValues)
// At this point we know v.NewForLabelValuesChecked(..) returns a permanent noop,
// which we precisely want to avoid using. Instead, make our own gauge that
// fetches the element on every Set.
return &timingRatioHistogramInner{
getGaugeOfRatio: func() Gauge { return v.delegate.WithLabelValues(labelValues...) },
numerator: initialNumerator,
denominator: initialDenominator,
}
}
type noopRatioed struct{}
func (noopRatioed) Set(float64) {}
func (noopRatioed) Add(float64) {}
func (noopRatioed) Sub(float64) {}
func (noopRatioed) Inc() {}
func (noopRatioed) Dec() {}
func (noopRatioed) SetToCurrentTime() {}
func (noopRatioed) SetDenominator(float64) {}
func (v *TimingRatioHistogramVec) Reset() {
v.delegate.Reset()
}

View File

@ -0,0 +1,56 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
type unionGauge []Gauge
var _ Gauge = unionGauge(nil)
// NewUnionGauge constructs a Gauge that delegates to all of the given Gauges
func NewUnionGauge(elts ...Gauge) Gauge {
return unionGauge(elts)
}
func (ug unionGauge) Set(x float64) {
for _, gauge := range ug {
gauge.Set(x)
}
}
func (ug unionGauge) Add(x float64) {
for _, gauge := range ug {
gauge.Add(x)
}
}
func (ug unionGauge) Inc() {
for _, gauge := range ug {
gauge.Inc()
}
}
func (ug unionGauge) Dec() {
for _, gauge := range ug {
gauge.Dec()
}
}
func (ug unionGauge) SetToCurrentTime() {
for _, gauge := range ug {
gauge.SetToCurrentTime()
}
}

View File

@ -0,0 +1,25 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
// RatioedGaugeVecPhasedElementPair extracts a pair of elements that differ in handling phase
func RatioedGaugeVecPhasedElementPair(vec RatioedGaugeVec, initialWaitingDenominator, initialExecutingDenominator float64, labelValues []string) RatioedGaugePair {
return RatioedGaugePair{
RequestsWaiting: vec.NewForLabelValuesSafe(0, initialWaitingDenominator, append([]string{LabelValueWaiting}, labelValues...)),
RequestsExecuting: vec.NewForLabelValuesSafe(0, initialExecutingDenominator, append([]string{LabelValueExecuting}, labelValues...)),
}
}

View File

@ -0,0 +1,92 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package request
import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const (
minimumSeats = 1
maximumSeats = 10
objectsPerSeat = 100.0
watchesPerSeat = 10.0
enableMutatingWorkEstimator = true
)
var eventAdditionalDuration = 5 * time.Millisecond
// WorkEstimatorConfig holds work estimator parameters.
type WorkEstimatorConfig struct {
*ListWorkEstimatorConfig `json:"listWorkEstimatorConfig,omitempty"`
*MutatingWorkEstimatorConfig `json:"mutatingWorkEstimatorConfig,omitempty"`
// MinimumSeats is the minimum number of seats a request must occupy.
MinimumSeats uint64 `json:"minimumSeats,omitempty"`
// MaximumSeats is the maximum number of seats a request can occupy
//
// NOTE: work_estimate_seats_samples metric uses the value of maximumSeats
// as the upper bound, so when we change maximumSeats we should also
// update the buckets of the metric.
MaximumSeats uint64 `json:"maximumSeats,omitempty"`
}
// ListWorkEstimatorConfig holds work estimator parameters related to list requests.
type ListWorkEstimatorConfig struct {
ObjectsPerSeat float64 `json:"objectsPerSeat,omitempty"`
}
// MutatingWorkEstimatorConfig holds work estimator
// parameters related to watches of mutating objects.
type MutatingWorkEstimatorConfig struct {
// TODO(wojtekt): Remove it once we tune the algorithm to not fail
// scalability tests.
Enabled bool `json:"enable,omitempty"`
EventAdditionalDuration metav1.Duration `json:"eventAdditionalDurationMs,omitempty"`
WatchesPerSeat float64 `json:"watchesPerSeat,omitempty"`
}
// DefaultWorkEstimatorConfig creates a new WorkEstimatorConfig with default values.
func DefaultWorkEstimatorConfig() *WorkEstimatorConfig {
return &WorkEstimatorConfig{
MinimumSeats: minimumSeats,
MaximumSeats: maximumSeats,
ListWorkEstimatorConfig: defaultListWorkEstimatorConfig(),
MutatingWorkEstimatorConfig: defaultMutatingWorkEstimatorConfig(),
}
}
// defaultListWorkEstimatorConfig creates a new ListWorkEstimatorConfig with default values.
func defaultListWorkEstimatorConfig() *ListWorkEstimatorConfig {
return &ListWorkEstimatorConfig{ObjectsPerSeat: objectsPerSeat}
}
// defaultMutatingWorkEstimatorConfig creates a new MutatingWorkEstimatorConfig with default values.
func defaultMutatingWorkEstimatorConfig() *MutatingWorkEstimatorConfig {
return &MutatingWorkEstimatorConfig{
Enabled: enableMutatingWorkEstimator,
EventAdditionalDuration: metav1.Duration{Duration: eventAdditionalDuration},
WatchesPerSeat: watchesPerSeat,
}
}
// eventAdditionalDuration converts eventAdditionalDurationMs to a time.Duration type.
func (c *MutatingWorkEstimatorConfig) eventAdditionalDuration() time.Duration {
return c.EventAdditionalDuration.Duration
}

View File

@ -0,0 +1,154 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package request
import (
"math"
"net/http"
"net/url"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/features"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
)
func newListWorkEstimator(countFn objectCountGetterFunc, config *WorkEstimatorConfig) WorkEstimatorFunc {
estimator := &listWorkEstimator{
config: config,
countGetterFn: countFn,
}
return estimator.estimate
}
type listWorkEstimator struct {
config *WorkEstimatorConfig
countGetterFn objectCountGetterFunc
}
func (e *listWorkEstimator) estimate(r *http.Request, flowSchemaName, priorityLevelName string) WorkEstimate {
requestInfo, ok := apirequest.RequestInfoFrom(r.Context())
if !ok {
// no RequestInfo should never happen, but to be on the safe side
// let's return maximumSeats
return WorkEstimate{InitialSeats: e.config.MaximumSeats}
}
if requestInfo.Name != "" {
// Requests with metadata.name specified are usually executed as get
// requests in storage layer so their width should be 1.
// Example of such list requests:
// /apis/certificates.k8s.io/v1/certificatesigningrequests?fieldSelector=metadata.name%3Dcsr-xxs4m
// /api/v1/namespaces/test/configmaps?fieldSelector=metadata.name%3Dbig-deployment-1&limit=500&resourceVersion=0
return WorkEstimate{InitialSeats: e.config.MinimumSeats}
}
query := r.URL.Query()
listOptions := metav1.ListOptions{}
if err := metav1.Convert_url_Values_To_v1_ListOptions(&query, &listOptions, nil); err != nil {
klog.ErrorS(err, "Failed to convert options while estimating work for the list request")
// This request is destined to fail in the validation layer,
// return maximumSeats for this request to be consistent.
return WorkEstimate{InitialSeats: e.config.MaximumSeats}
}
isListFromCache := !shouldListFromStorage(query, &listOptions)
numStored, err := e.countGetterFn(key(requestInfo))
switch {
case err == ObjectCountStaleErr:
// object count going stale is indicative of degradation, so we should
// be conservative here and allocate maximum seats to this list request.
// NOTE: if a CRD is removed, its count will go stale first and then the
// pruner will eventually remove the CRD from the cache.
return WorkEstimate{InitialSeats: e.config.MaximumSeats}
case err == ObjectCountNotFoundErr:
// there are multiple scenarios in which we can see this error:
// a. the type is truly unknown, a typo on the caller's part.
// b. the count has gone stale for too long and the pruner
// has removed the type from the cache.
// c. the type is an aggregated resource that is served by a
// different apiserver (thus its object count is not updated)
// we don't have a way to distinguish between those situations.
// However, in case c, the request is delegated to a different apiserver,
// and thus its cost for our server is minimal. To avoid the situation
// when aggregated API calls are overestimated, we allocate the minimum
// possible seats (see #109106 as an example when being more conservative
// led to problems).
return WorkEstimate{InitialSeats: e.config.MinimumSeats}
case err != nil:
// we should never be here since Get returns either ObjectCountStaleErr or
// ObjectCountNotFoundErr, return maximumSeats to be on the safe side.
klog.ErrorS(err, "Unexpected error from object count tracker")
return WorkEstimate{InitialSeats: e.config.MaximumSeats}
}
limit := numStored
if utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking) && listOptions.Limit > 0 &&
listOptions.Limit < numStored {
limit = listOptions.Limit
}
var estimatedObjectsToBeProcessed int64
switch {
case isListFromCache:
// TODO: For resources that implement indexes at the watchcache level,
// we need to adjust the cost accordingly
estimatedObjectsToBeProcessed = numStored
case listOptions.FieldSelector != "" || listOptions.LabelSelector != "":
estimatedObjectsToBeProcessed = numStored + limit
default:
estimatedObjectsToBeProcessed = 2 * limit
}
// for now, our rough estimate is to allocate one seat to each 100 obejcts that
// will be processed by the list request.
// we will come up with a different formula for the transformation function and/or
// fine tune this number in future iteratons.
seats := uint64(math.Ceil(float64(estimatedObjectsToBeProcessed) / e.config.ObjectsPerSeat))
// make sure we never return a seat of zero
if seats < e.config.MinimumSeats {
seats = e.config.MinimumSeats
}
if seats > e.config.MaximumSeats {
seats = e.config.MaximumSeats
}
return WorkEstimate{InitialSeats: seats}
}
func key(requestInfo *apirequest.RequestInfo) string {
groupResource := &schema.GroupResource{
Group: requestInfo.APIGroup,
Resource: requestInfo.Resource,
}
return groupResource.String()
}
// NOTICE: Keep in sync with shouldDelegateList function in
//
// staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher.go
func shouldListFromStorage(query url.Values, opts *metav1.ListOptions) bool {
resourceVersion := opts.ResourceVersion
pagingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.APIListChunking)
hasContinuation := pagingEnabled && len(opts.Continue) > 0
hasLimit := pagingEnabled && opts.Limit > 0 && resourceVersion != "0"
return resourceVersion == "" || hasContinuation || hasLimit || opts.ResourceVersionMatch == metav1.ResourceVersionMatchExact
}

View File

@ -0,0 +1,149 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package request
import (
"math"
"net/http"
"time"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/util/flowcontrol/metrics"
)
func newMutatingWorkEstimator(countFn watchCountGetterFunc, config *WorkEstimatorConfig) WorkEstimatorFunc {
estimator := &mutatingWorkEstimator{
config: config,
countFn: countFn,
}
return estimator.estimate
}
type mutatingWorkEstimator struct {
config *WorkEstimatorConfig
countFn watchCountGetterFunc
}
func (e *mutatingWorkEstimator) estimate(r *http.Request, flowSchemaName, priorityLevelName string) WorkEstimate {
// TODO(wojtekt): Remove once we tune the algorithm to not fail
// scalability tests.
if !e.config.Enabled {
return WorkEstimate{
InitialSeats: 1,
}
}
requestInfo, ok := apirequest.RequestInfoFrom(r.Context())
if !ok {
// no RequestInfo should never happen, but to be on the safe side
// let's return a large value.
return WorkEstimate{
InitialSeats: 1,
FinalSeats: e.config.MaximumSeats,
AdditionalLatency: e.config.eventAdditionalDuration(),
}
}
if isRequestExemptFromWatchEvents(requestInfo) {
return WorkEstimate{
InitialSeats: e.config.MinimumSeats,
FinalSeats: 0,
AdditionalLatency: time.Duration(0),
}
}
watchCount := e.countFn(requestInfo)
metrics.ObserveWatchCount(r.Context(), priorityLevelName, flowSchemaName, watchCount)
// The cost of the request associated with the watchers of that event
// consists of three parts:
// - cost of going through the event change logic
// - cost of serialization of the event
// - cost of processing an event object for each watcher (e.g. filtering,
// sending data over network)
// We're starting simple to get some operational experience with it and
// we will work on tuning the algorithm later. Given that the actual work
// associated with processing watch events is happening in multiple
// goroutines (proportional to the number of watchers) that are all
// resumed at once, as a starting point we assume that each such goroutine
// is taking 1/Nth of a seat for M milliseconds.
// We allow the accounting of that work in P&F to be reshaped into another
// rectangle of equal area for practical reasons.
var finalSeats uint64
var additionalLatency time.Duration
// TODO: Make this unconditional after we tune the algorithm better.
// Technically, there is an overhead connected to processing an event after
// the request finishes even if there is a small number of watches.
// However, until we tune the estimation we want to stay on the safe side
// an avoid introducing additional latency for almost every single request.
if watchCount >= int(e.config.WatchesPerSeat) {
// TODO: As described in the KEP, we should take into account that not all
// events are equal and try to estimate the cost of a single event based on
// some historical data about size of events.
finalSeats = uint64(math.Ceil(float64(watchCount) / e.config.WatchesPerSeat))
finalWork := SeatsTimesDuration(float64(finalSeats), e.config.eventAdditionalDuration())
// While processing individual events is highly parallel,
// the design/implementation of P&F has a couple limitations that
// make using this assumption in the P&F implementation very
// inefficient because:
// - we reserve max(initialSeats, finalSeats) for time of executing
// both phases of the request
// - even more importantly, when a given `wide` request is the one to
// be dispatched, we are not dispatching any other request until
// we accumulate enough seats to dispatch the nominated one, even
// if currently unoccupied seats would allow for dispatching some
// other requests in the meantime
// As a consequence of these, the wider the request, the more capacity
// will effectively be blocked and unused during dispatching and
// executing this request.
//
// To mitigate the impact of it, we're capping the maximum number of
// seats that can be assigned to a given request. Thanks to it:
// 1) we reduce the amount of seat-seconds that are "wasted" during
// dispatching and executing initial phase of the request
// 2) we are not changing the finalWork estimate - just potentially
// reshaping it to be narrower and longer. As long as the maximum
// seats setting will prevent dispatching too many requests at once
// to prevent overloading kube-apiserver (and/or etcd or the VM or
// a physical machine it is running on), we believe the relaxed
// version should be good enough to achieve the P&F goals.
//
// TODO: Confirm that the current cap of maximumSeats allow us to
// achieve the above.
if finalSeats > e.config.MaximumSeats {
finalSeats = e.config.MaximumSeats
}
additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
}
return WorkEstimate{
InitialSeats: 1,
FinalSeats: finalSeats,
AdditionalLatency: additionalLatency,
}
}
func isRequestExemptFromWatchEvents(requestInfo *apirequest.RequestInfo) bool {
// Creating token for service account does not produce any event,
// but still serviceaccounts can have multiple watchers.
if requestInfo.Resource == "serviceaccounts" && requestInfo.Subresource == "token" {
return true
}
return false
}

View File

@ -0,0 +1,169 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package request
import (
"errors"
"sync"
"time"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
const (
// type deletion (it applies mostly to CRD) is not a very frequent
// operation so we can afford to prune the cache at a large interval.
// at the same time, we also want to make sure that the scalability
// tests hit this code path.
pruneInterval = 1 * time.Hour
// the storage layer polls for object count at every 1m interval, we will allow
// up to 2-3 transient failures to get the latest count for a given resource.
staleTolerationThreshold = 3 * time.Minute
)
var (
// ObjectCountNotFoundErr is returned when the object count for
// a given resource is not being tracked.
ObjectCountNotFoundErr = errors.New("object count not found for the given resource")
// ObjectCountStaleErr is returned when the object count for a
// given resource has gone stale due to transient failures.
ObjectCountStaleErr = errors.New("object count has gone stale for the given resource")
)
// StorageObjectCountTracker is an interface that is used to keep track of
// of the total number of objects for each resource.
// {group}.{resource} is used as the key name to update and retrieve
// the total number of objects for a given resource.
type StorageObjectCountTracker interface {
// Set is invoked to update the current number of total
// objects for the given resource
Set(string, int64)
// Get returns the total number of objects for the given resource.
// The following errors are returned:
// - if the count has gone stale for a given resource due to transient
// failures ObjectCountStaleErr is returned.
// - if the given resource is not being tracked then
// ObjectCountNotFoundErr is returned.
Get(string) (int64, error)
// RunUntil starts all the necessary maintenance.
RunUntil(stopCh <-chan struct{})
}
// NewStorageObjectCountTracker returns an instance of
// StorageObjectCountTracker interface that can be used to
// keep track of the total number of objects for each resource.
func NewStorageObjectCountTracker() StorageObjectCountTracker {
return &objectCountTracker{
clock: &clock.RealClock{},
counts: map[string]*timestampedCount{},
}
}
// timestampedCount stores the count of a given resource with a last updated
// timestamp so we can prune it after it goes stale for certain threshold.
type timestampedCount struct {
count int64
lastUpdatedAt time.Time
}
// objectCountTracker implements StorageObjectCountTracker with
// reader/writer mutual exclusion lock.
type objectCountTracker struct {
clock clock.PassiveClock
lock sync.RWMutex
counts map[string]*timestampedCount
}
func (t *objectCountTracker) Set(groupResource string, count int64) {
if count <= -1 {
// a value of -1 indicates that the 'Count' call failed to contact
// the storage layer, in most cases this error can be transient.
// we will continue to work with the count that is in the cache
// up to a certain threshold defined by staleTolerationThreshold.
// in case this becomes a non transient error then the count for
// the given resource will will eventually be removed from
// the cache by the pruner.
return
}
now := t.clock.Now()
// lock for writing
t.lock.Lock()
defer t.lock.Unlock()
if item, ok := t.counts[groupResource]; ok {
item.count = count
item.lastUpdatedAt = now
return
}
t.counts[groupResource] = &timestampedCount{
count: count,
lastUpdatedAt: now,
}
}
func (t *objectCountTracker) Get(groupResource string) (int64, error) {
staleThreshold := t.clock.Now().Add(-staleTolerationThreshold)
t.lock.RLock()
defer t.lock.RUnlock()
if item, ok := t.counts[groupResource]; ok {
if item.lastUpdatedAt.Before(staleThreshold) {
return item.count, ObjectCountStaleErr
}
return item.count, nil
}
return 0, ObjectCountNotFoundErr
}
// RunUntil runs all the necessary maintenance.
func (t *objectCountTracker) RunUntil(stopCh <-chan struct{}) {
wait.PollUntil(
pruneInterval,
func() (bool, error) {
// always prune at every pruneInterval
return false, t.prune(pruneInterval)
}, stopCh)
klog.InfoS("StorageObjectCountTracker pruner is exiting")
}
func (t *objectCountTracker) prune(threshold time.Duration) error {
oldestLastUpdatedAtAllowed := t.clock.Now().Add(-threshold)
// lock for writing
t.lock.Lock()
defer t.lock.Unlock()
for groupResource, count := range t.counts {
if count.lastUpdatedAt.After(oldestLastUpdatedAtAllowed) {
continue
}
delete(t.counts, groupResource)
}
return nil
}

View File

@ -0,0 +1,65 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package request
import (
"fmt"
"math"
"time"
)
// SeatSeconds is a measure of work, in units of seat-seconds, using a fixed-point representation.
// `SeatSeconds(n)` represents `n/ssScale` seat-seconds.
// The `ssScale` constant is private to the implementation here,
// no other code should use it.
type SeatSeconds uint64
// MaxSeatsSeconds is the maximum representable value of SeatSeconds
const MaxSeatSeconds = SeatSeconds(math.MaxUint64)
// MinSeatSeconds is the lowest representable value of SeatSeconds
const MinSeatSeconds = SeatSeconds(0)
// SeatsTimeDuration produces the SeatSeconds value for the given factors.
// This is intended only to produce small values, increments in work
// rather than amount of work done since process start.
func SeatsTimesDuration(seats float64, duration time.Duration) SeatSeconds {
return SeatSeconds(math.Round(seats * float64(duration/time.Nanosecond) / (1e9 / ssScale)))
}
// ToFloat converts to a floating-point representation.
// This conversion may lose precision.
func (ss SeatSeconds) ToFloat() float64 {
return float64(ss) / ssScale
}
// DurationPerSeat returns duration per seat.
// This division may lose precision.
func (ss SeatSeconds) DurationPerSeat(seats float64) time.Duration {
return time.Duration(float64(ss) / seats * (float64(time.Second) / ssScale))
}
// String converts to a string.
// This is suitable for large as well as small values.
func (ss SeatSeconds) String() string {
const div = SeatSeconds(ssScale)
quo := ss / div
rem := ss - quo*div
return fmt.Sprintf("%d.%08dss", quo, rem)
}
const ssScale = 1e8

View File

@ -0,0 +1,113 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package request
import (
"fmt"
"net/http"
"time"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/klog/v2"
)
// WorkEstimate carries three of the four parameters that determine the work in a request.
// The fourth parameter is the duration of the initial phase of execution.
type WorkEstimate struct {
// InitialSeats is the number of seats occupied while the server is
// executing this request.
InitialSeats uint64
// FinalSeats is the number of seats occupied at the end,
// during the AdditionalLatency.
FinalSeats uint64
// AdditionalLatency specifies the additional duration the seats allocated
// to this request must be reserved after the given request had finished.
// AdditionalLatency should not have any impact on the user experience, the
// caller must not experience this additional latency.
AdditionalLatency time.Duration
}
// MaxSeats returns the maximum number of seats the request occupies over the
// phases of being served.
func (we *WorkEstimate) MaxSeats() int {
if we.InitialSeats >= we.FinalSeats {
return int(we.InitialSeats)
}
return int(we.FinalSeats)
}
// objectCountGetterFunc represents a function that gets the total
// number of objects for a given resource.
type objectCountGetterFunc func(string) (int64, error)
// watchCountGetterFunc represents a function that gets the total
// number of watchers potentially interested in a given request.
type watchCountGetterFunc func(*apirequest.RequestInfo) int
// NewWorkEstimator estimates the work that will be done by a given request,
// if no WorkEstimatorFunc matches the given request then the default
// work estimate of 1 seat is allocated to the request.
func NewWorkEstimator(objectCountFn objectCountGetterFunc, watchCountFn watchCountGetterFunc, config *WorkEstimatorConfig) WorkEstimatorFunc {
estimator := &workEstimator{
minimumSeats: config.MinimumSeats,
maximumSeats: config.MaximumSeats,
listWorkEstimator: newListWorkEstimator(objectCountFn, config),
mutatingWorkEstimator: newMutatingWorkEstimator(watchCountFn, config),
}
return estimator.estimate
}
// WorkEstimatorFunc returns the estimated work of a given request.
// This function will be used by the Priority & Fairness filter to
// estimate the work of of incoming requests.
type WorkEstimatorFunc func(request *http.Request, flowSchemaName, priorityLevelName string) WorkEstimate
func (e WorkEstimatorFunc) EstimateWork(r *http.Request, flowSchemaName, priorityLevelName string) WorkEstimate {
return e(r, flowSchemaName, priorityLevelName)
}
type workEstimator struct {
// the minimum number of seats a request must occupy
minimumSeats uint64
// the maximum number of seats a request can occupy
maximumSeats uint64
// listWorkEstimator estimates work for list request(s)
listWorkEstimator WorkEstimatorFunc
// mutatingWorkEstimator calculates the width of mutating request(s)
mutatingWorkEstimator WorkEstimatorFunc
}
func (e *workEstimator) estimate(r *http.Request, flowSchemaName, priorityLevelName string) WorkEstimate {
requestInfo, ok := apirequest.RequestInfoFrom(r.Context())
if !ok {
klog.ErrorS(fmt.Errorf("no RequestInfo found in context"), "Failed to estimate work for the request", "URI", r.RequestURI)
// no RequestInfo should never happen, but to be on the safe side let's return maximumSeats
return WorkEstimate{InitialSeats: e.maximumSeats}
}
switch requestInfo.Verb {
case "list":
return e.listWorkEstimator.EstimateWork(r, flowSchemaName, priorityLevelName)
case "create", "update", "patch", "delete":
return e.mutatingWorkEstimator.EstimateWork(r, flowSchemaName, priorityLevelName)
}
return WorkEstimate{InitialSeats: e.minimumSeats}
}

203
vendor/k8s.io/apiserver/pkg/util/flowcontrol/rule.go generated vendored Normal file
View File

@ -0,0 +1,203 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flowcontrol
import (
"strings"
flowcontrol "k8s.io/api/flowcontrol/v1beta3"
"k8s.io/apiserver/pkg/authentication/serviceaccount"
"k8s.io/apiserver/pkg/authentication/user"
"k8s.io/apiserver/pkg/endpoints/request"
)
// Tests whether a given request and FlowSchema match. Nobody mutates
// either input.
func matchesFlowSchema(digest RequestDigest, flowSchema *flowcontrol.FlowSchema) bool {
for _, policyRule := range flowSchema.Spec.Rules {
if matchesPolicyRule(digest, &policyRule) {
return true
}
}
return false
}
func matchesPolicyRule(digest RequestDigest, policyRule *flowcontrol.PolicyRulesWithSubjects) bool {
if !matchesASubject(digest.User, policyRule.Subjects) {
return false
}
if digest.RequestInfo.IsResourceRequest {
return matchesAResourceRule(digest.RequestInfo, policyRule.ResourceRules)
}
return matchesANonResourceRule(digest.RequestInfo, policyRule.NonResourceRules)
}
func matchesASubject(user user.Info, subjects []flowcontrol.Subject) bool {
for _, subject := range subjects {
if matchesSubject(user, subject) {
return true
}
}
return false
}
func matchesSubject(user user.Info, subject flowcontrol.Subject) bool {
switch subject.Kind {
case flowcontrol.SubjectKindUser:
return subject.User != nil && (subject.User.Name == flowcontrol.NameAll || subject.User.Name == user.GetName())
case flowcontrol.SubjectKindGroup:
if subject.Group == nil {
return false
}
seek := subject.Group.Name
if seek == "*" {
return true
}
for _, userGroup := range user.GetGroups() {
if userGroup == seek {
return true
}
}
return false
case flowcontrol.SubjectKindServiceAccount:
if subject.ServiceAccount == nil {
return false
}
if subject.ServiceAccount.Name == flowcontrol.NameAll {
return serviceAccountMatchesNamespace(subject.ServiceAccount.Namespace, user.GetName())
}
return serviceaccount.MatchesUsername(subject.ServiceAccount.Namespace, subject.ServiceAccount.Name, user.GetName())
default:
return false
}
}
// serviceAccountMatchesNamespace checks whether the provided service account username matches the namespace, without
// allocating. Use this when checking a service account namespace against a known string.
// This is copied from `k8s.io/apiserver/pkg/authentication/serviceaccount::MatchesUsername` and simplified to not check the name part.
func serviceAccountMatchesNamespace(namespace string, username string) bool {
const (
ServiceAccountUsernamePrefix = "system:serviceaccount:"
ServiceAccountUsernameSeparator = ":"
)
if !strings.HasPrefix(username, ServiceAccountUsernamePrefix) {
return false
}
username = username[len(ServiceAccountUsernamePrefix):]
if !strings.HasPrefix(username, namespace) {
return false
}
username = username[len(namespace):]
return strings.HasPrefix(username, ServiceAccountUsernameSeparator)
}
func matchesAResourceRule(ri *request.RequestInfo, rules []flowcontrol.ResourcePolicyRule) bool {
for _, rr := range rules {
if matchesResourcePolicyRule(ri, rr) {
return true
}
}
return false
}
func matchesResourcePolicyRule(ri *request.RequestInfo, policyRule flowcontrol.ResourcePolicyRule) bool {
if !matchPolicyRuleVerb(policyRule.Verbs, ri.Verb) {
return false
}
if !matchPolicyRuleResource(policyRule.Resources, ri.Resource, ri.Subresource) {
return false
}
if !matchPolicyRuleAPIGroup(policyRule.APIGroups, ri.APIGroup) {
return false
}
if len(ri.Namespace) == 0 {
return policyRule.ClusterScope
}
return containsString(ri.Namespace, policyRule.Namespaces, flowcontrol.NamespaceEvery)
}
func matchesANonResourceRule(ri *request.RequestInfo, rules []flowcontrol.NonResourcePolicyRule) bool {
for _, rr := range rules {
if matchesNonResourcePolicyRule(ri, rr) {
return true
}
}
return false
}
func matchesNonResourcePolicyRule(ri *request.RequestInfo, policyRule flowcontrol.NonResourcePolicyRule) bool {
if !matchPolicyRuleVerb(policyRule.Verbs, ri.Verb) {
return false
}
return matchPolicyRuleNonResourceURL(policyRule.NonResourceURLs, ri.Path)
}
func matchPolicyRuleVerb(policyRuleVerbs []string, requestVerb string) bool {
return containsString(requestVerb, policyRuleVerbs, flowcontrol.VerbAll)
}
func matchPolicyRuleNonResourceURL(policyRuleRequestURLs []string, requestPath string) bool {
for _, rulePath := range policyRuleRequestURLs {
if rulePath == flowcontrol.NonResourceAll || rulePath == requestPath {
return true
}
rulePrefix := strings.TrimSuffix(rulePath, "*")
if !strings.HasSuffix(rulePrefix, "/") {
rulePrefix = rulePrefix + "/"
}
if strings.HasPrefix(requestPath, rulePrefix) {
return true
}
}
return false
}
func matchPolicyRuleAPIGroup(policyRuleAPIGroups []string, requestAPIGroup string) bool {
return containsString(requestAPIGroup, policyRuleAPIGroups, flowcontrol.APIGroupAll)
}
func rsJoin(requestResource, requestSubresource string) string {
seekString := requestResource
if requestSubresource != "" {
seekString = requestResource + "/" + requestSubresource
}
return seekString
}
func matchPolicyRuleResource(policyRuleRequestResources []string, requestResource, requestSubresource string) bool {
return containsString(rsJoin(requestResource, requestSubresource), policyRuleRequestResources, flowcontrol.ResourceAll)
}
// containsString returns true if either `x` or `wildcard` is in
// `list`. The wildcard is not a pattern to match against `x`; rather
// the presence of the wildcard in the list is the caller's way of
// saying that all values of `x` should match the list. This function
// assumes that if `wildcard` is in `list` then it is the only member
// of the list, which is enforced by validation.
func containsString(x string, list []string, wildcard string) bool {
if len(list) == 1 && list[0] == wildcard {
return true
}
for _, y := range list {
if x == y {
return true
}
}
return false
}

View File

@ -0,0 +1,234 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flowcontrol
import (
"net/http"
"sync"
metainternalversion "k8s.io/apimachinery/pkg/apis/meta/internalversion"
"k8s.io/apimachinery/pkg/apis/meta/internalversion/scheme"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/klog/v2"
)
// readOnlyVerbs contains verbs for read-only requests.
var readOnlyVerbs = sets.NewString("get", "list", "watch", "proxy")
// watchIdentifier identifies group of watches that are similar.
// As described in the "Priority and Fairness" KEP, we consider
// watches similar if they have the same resourceType, namespace
// and name. We ignore selectors as they have to be evaluated
// when processing an even anyway.
//
// TODO: For now we only track the number of watches registered
// in our kube-apiserver. Eventually we should consider sharing
// this information with other kube-apiserver as described in the
// KEP, but this isn't part of the first version.
type watchIdentifier struct {
apiGroup string
resource string
namespace string
name string
}
// ForgetWatchFunc is a function that should be called to forget
// the previously registered watch from the watch tracker.
type ForgetWatchFunc func()
// WatchTracker is an interface that allows tracking the number
// of watches in the system for the purpose of estimating the
// cost of incoming mutating requests.
type WatchTracker interface {
// RegisterWatch reqisters a watch based on the provided http.Request
// in the tracker. It returns the function that should be called
// to forget the watcher once it is finished.
RegisterWatch(r *http.Request) ForgetWatchFunc
// GetInterestedWatchCount returns the number of watches that are
// potentially interested in a request with a given RequestInfo
// for the purpose of estimating cost of that request.
GetInterestedWatchCount(requestInfo *request.RequestInfo) int
}
// builtinIndexes represents of set of indexes registered in
// watchcache that are indexing watches and increase speed of
// their processing.
// We define the indexes as a map from a resource to the path
// to the field in the object on which the index is built.
type builtinIndexes map[string]string
func getBuiltinIndexes() builtinIndexes {
// The only existing indexes as of now are:
// - spec.nodeName for pods
// - metadata.Name for nodes, secrets and configmaps
// However, we can ignore the latter, because the requestInfo.Name
// is set for them (i.e. we already catch them correctly).
return map[string]string{
"pods": "spec.nodeName",
}
}
// watchTracker tracks the number of watches in the system for
// the purpose of estimating the cost of incoming mutating requests.
type watchTracker struct {
// indexes represents a set of registered indexes.
// It can't change after creation.
indexes builtinIndexes
lock sync.Mutex
watchCount map[watchIdentifier]int
}
func NewWatchTracker() WatchTracker {
return &watchTracker{
indexes: getBuiltinIndexes(),
watchCount: make(map[watchIdentifier]int),
}
}
const (
unsetValue = "<unset>"
)
func getIndexValue(r *http.Request, field string) string {
opts := metainternalversion.ListOptions{}
if err := scheme.ParameterCodec.DecodeParameters(r.URL.Query(), metav1.SchemeGroupVersion, &opts); err != nil {
klog.Warningf("Couldn't parse list options for %v: %v", r.URL.Query(), err)
return unsetValue
}
if opts.FieldSelector == nil {
return unsetValue
}
if value, ok := opts.FieldSelector.RequiresExactMatch(field); ok {
return value
}
return unsetValue
}
type indexValue struct {
resource string
value string
}
// RegisterWatch implements WatchTracker interface.
func (w *watchTracker) RegisterWatch(r *http.Request) ForgetWatchFunc {
requestInfo, ok := request.RequestInfoFrom(r.Context())
if !ok || requestInfo == nil || requestInfo.Verb != "watch" {
return nil
}
var index *indexValue
if indexField, ok := w.indexes[requestInfo.Resource]; ok {
index = &indexValue{
resource: requestInfo.Resource,
value: getIndexValue(r, indexField),
}
}
identifier := &watchIdentifier{
apiGroup: requestInfo.APIGroup,
resource: requestInfo.Resource,
namespace: requestInfo.Namespace,
name: requestInfo.Name,
}
w.lock.Lock()
defer w.lock.Unlock()
w.updateIndexLocked(identifier, index, 1)
return w.forgetWatch(identifier, index)
}
func (w *watchTracker) updateIndexLocked(identifier *watchIdentifier, index *indexValue, incr int) {
if index == nil {
w.watchCount[*identifier] += incr
} else {
// For resources with defined index, for a given watch event we are
// only processing the watchers that:
// (a) do not specify field selector for an index field
// (b) do specify field selector with the value equal to the value
// coming from the processed object
//
// TODO(wojtek-t): For the sake of making progress and initially
// simplifying the implementation, we approximate (b) for all values
// as the value for an empty string. The assumption we're making here
// is that the difference between the actual number of watchers that
// will be processed, i.e. (a)+(b) above and the one from our
// approximation i.e. (a)+[(b) for field value of ""] will be small.
// This seem to be true in almost all production clusters, which makes
// it a reasonable first step simplification to unblock progres on it.
if index.value == unsetValue || index.value == "" {
w.watchCount[*identifier] += incr
}
}
}
func (w *watchTracker) forgetWatch(identifier *watchIdentifier, index *indexValue) ForgetWatchFunc {
return func() {
w.lock.Lock()
defer w.lock.Unlock()
w.updateIndexLocked(identifier, index, -1)
if w.watchCount[*identifier] == 0 {
delete(w.watchCount, *identifier)
}
}
}
// GetInterestedWatchCount implements WatchTracker interface.
//
// TODO(wojtek-t): As of now, requestInfo for object creation (POST) doesn't
//
// contain the Name field set. Figure out if we can somehow get it for the
// more accurate cost estimation.
//
// TODO(wojtek-t): Figure out how to approach DELETECOLLECTION calls.
func (w *watchTracker) GetInterestedWatchCount(requestInfo *request.RequestInfo) int {
if requestInfo == nil || readOnlyVerbs.Has(requestInfo.Verb) {
return 0
}
result := 0
// The watches that we're interested in include:
// - watches for all objects of a resource type (no namespace and name specified)
// - watches for all objects of a resource type in the same namespace (no name specified)
// - watched interested in this particular object
identifier := &watchIdentifier{
apiGroup: requestInfo.APIGroup,
resource: requestInfo.Resource,
}
w.lock.Lock()
defer w.lock.Unlock()
result += w.watchCount[*identifier]
if requestInfo.Namespace != "" {
identifier.namespace = requestInfo.Namespace
result += w.watchCount[*identifier]
}
if requestInfo.Name != "" {
identifier.name = requestInfo.Name
result += w.watchCount[*identifier]
}
return result
}

19
vendor/k8s.io/apiserver/pkg/util/flushwriter/doc.go generated vendored Normal file
View File

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package flushwriter implements a wrapper for a writer that flushes on every
// write if that writer implements the io.Flusher interface
package flushwriter // import "k8s.io/apiserver/pkg/util/flushwriter"

53
vendor/k8s.io/apiserver/pkg/util/flushwriter/writer.go generated vendored Normal file
View File

@ -0,0 +1,53 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flushwriter
import (
"io"
"net/http"
)
// Wrap wraps an io.Writer into a writer that flushes after every write if
// the writer implements the Flusher interface.
func Wrap(w io.Writer) io.Writer {
fw := &flushWriter{
writer: w,
}
if flusher, ok := w.(http.Flusher); ok {
fw.flusher = flusher
}
return fw
}
// flushWriter provides wrapper for responseWriter with HTTP streaming capabilities
type flushWriter struct {
flusher http.Flusher
writer io.Writer
}
// Write is a FlushWriter implementation of the io.Writer that sends any buffered
// data to the client.
func (fw *flushWriter) Write(p []byte) (n int, err error) {
n, err = fw.writer.Write(p)
if err != nil {
return
}
if fw.flusher != nil {
fw.flusher.Flush()
}
return
}

83
vendor/k8s.io/apiserver/pkg/util/openapi/enablement.go generated vendored Normal file
View File

@ -0,0 +1,83 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package openapi
import (
"strings"
genericfeatures "k8s.io/apiserver/pkg/features"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kube-openapi/pkg/common"
"k8s.io/kube-openapi/pkg/schemamutation"
"k8s.io/kube-openapi/pkg/validation/spec"
)
// enumTypeDescriptionHeader is the header of enum section in schema description.
const enumTypeDescriptionHeader = "Possible enum values:"
// GetOpenAPIDefinitionsWithoutDisabledFeatures wraps a GetOpenAPIDefinitions to revert
// any change to the schema that was made by disabled features.
func GetOpenAPIDefinitionsWithoutDisabledFeatures(GetOpenAPIDefinitions common.GetOpenAPIDefinitions) common.GetOpenAPIDefinitions {
return func(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition {
defs := GetOpenAPIDefinitions(ref)
restoreDefinitions(defs)
return defs
}
}
// restoreDefinitions restores any changes by disabled features from definition map.
func restoreDefinitions(defs map[string]common.OpenAPIDefinition) {
// revert changes from OpenAPIEnums
if !utilfeature.DefaultFeatureGate.Enabled(genericfeatures.OpenAPIEnums) {
for gvk, def := range defs {
orig := &def.Schema
if ret := pruneEnums(orig); ret != orig {
def.Schema = *ret
defs[gvk] = def
}
}
}
}
func pruneEnums(schema *spec.Schema) *spec.Schema {
walker := schemamutation.Walker{
SchemaCallback: func(schema *spec.Schema) *spec.Schema {
orig := schema
clone := func() {
if orig == schema { // if schema has not been mutated yet
schema = new(spec.Schema)
*schema = *orig // make a clone from orig to schema
}
}
if headerIndex := strings.Index(schema.Description, enumTypeDescriptionHeader); headerIndex != -1 {
// remove the enum section from description.
// note that the new lines before the header should be removed too,
// thus the slice range.
clone()
schema.Description = schema.Description[:headerIndex]
}
if len(schema.Enum) != 0 {
// remove the enum field
clone()
schema.Enum = nil
}
return schema
},
RefCallback: schemamutation.RefCallbackNoop,
}
return walker.WalkSchema(schema)
}

46
vendor/k8s.io/apiserver/pkg/util/openapi/proto.go generated vendored Normal file
View File

@ -0,0 +1,46 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package openapi
import (
"encoding/json"
openapi_v2 "github.com/google/gnostic/openapiv2"
"k8s.io/kube-openapi/pkg/util/proto"
"k8s.io/kube-openapi/pkg/validation/spec"
)
// ToProtoModels builds the proto formatted models from OpenAPI spec
func ToProtoModels(openAPISpec *spec.Swagger) (proto.Models, error) {
specBytes, err := json.MarshalIndent(openAPISpec, " ", " ")
if err != nil {
return nil, err
}
doc, err := openapi_v2.ParseDocument(specBytes)
if err != nil {
return nil, err
}
models, err := proto.NewOpenAPIData(doc)
if err != nil {
return nil, err
}
return models, nil
}

View File

@ -0,0 +1,107 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shufflesharding
import (
"fmt"
"math"
)
// MaxHashBits is the max bit length which can be used from hash value.
// If we use all bits of hash value, the critical(last) card shuffled by
// Dealer will be uneven to 2:3 (first half:second half) at most,
// in order to reduce this unevenness to 32:33, we set MaxHashBits to 60 here.
const MaxHashBits = 60
// RequiredEntropyBits makes a quick and slightly conservative estimate of the number
// of bits of hash value that are consumed in shuffle sharding a deck of the given size
// to a hand of the given size. The result is meaningful only if
// 1 <= handSize <= deckSize <= 1<<26.
func RequiredEntropyBits(deckSize, handSize int) int {
return int(math.Ceil(math.Log2(float64(deckSize)) * float64(handSize)))
}
// Dealer contains some necessary parameters and provides some methods for shuffle sharding.
// Dealer is thread-safe.
type Dealer struct {
deckSize int
handSize int
}
// NewDealer will create a Dealer with the given deckSize and handSize, will return error when
// deckSize or handSize is invalid as below.
// 1. deckSize or handSize is not positive
// 2. handSize is greater than deckSize
// 3. deckSize is impractically large (greater than 1<<26)
// 4. required entropy bits of deckSize and handSize is greater than MaxHashBits
func NewDealer(deckSize, handSize int) (*Dealer, error) {
if deckSize <= 0 || handSize <= 0 {
return nil, fmt.Errorf("deckSize %d or handSize %d is not positive", deckSize, handSize)
}
if handSize > deckSize {
return nil, fmt.Errorf("handSize %d is greater than deckSize %d", handSize, deckSize)
}
if deckSize > 1<<26 {
return nil, fmt.Errorf("deckSize %d is impractically large", deckSize)
}
if RequiredEntropyBits(deckSize, handSize) > MaxHashBits {
return nil, fmt.Errorf("required entropy bits of deckSize %d and handSize %d is greater than %d", deckSize, handSize, MaxHashBits)
}
return &Dealer{
deckSize: deckSize,
handSize: handSize,
}, nil
}
// Deal shuffles a card deck and deals a hand of cards, using the given hashValue as the source of entropy.
// The deck size and hand size are properties of the Dealer.
// This function synchronously makes sequential calls to pick, one for each dealt card.
// Each card is identified by an integer in the range [0, deckSize).
// For example, for deckSize=128 and handSize=4 this function might call pick(14); pick(73); pick(119); pick(26).
func (d *Dealer) Deal(hashValue uint64, pick func(int)) {
// 15 is the largest possible value of handSize
var remainders [15]int
for i := 0; i < d.handSize; i++ {
hashValueNext := hashValue / uint64(d.deckSize-i)
remainders[i] = int(hashValue - uint64(d.deckSize-i)*hashValueNext)
hashValue = hashValueNext
}
for i := 0; i < d.handSize; i++ {
card := remainders[i]
for j := i; j > 0; j-- {
if card >= remainders[j-1] {
card++
}
}
pick(card)
}
}
// DealIntoHand shuffles and deals according to the Dealer's parameters,
// using the given hashValue as the source of entropy and then
// returns the dealt cards as a slice of `int`.
// If `hand` has the correct length as Dealer's handSize, it will be used as-is and no allocations will be made.
// If `hand` is nil or too small, it will be extended (performing an allocation).
// If `hand` is too large, a sub-slice will be returned.
func (d *Dealer) DealIntoHand(hashValue uint64, hand []int) []int {
h := hand[:0]
d.Deal(hashValue, func(card int) { h = append(h, card) })
return h
}

350
vendor/k8s.io/apiserver/pkg/util/wsstream/conn.go generated vendored Normal file
View File

@ -0,0 +1,350 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package wsstream
import (
"encoding/base64"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"time"
"golang.org/x/net/websocket"
"k8s.io/klog/v2"
"k8s.io/apimachinery/pkg/util/runtime"
)
// The Websocket subprotocol "channel.k8s.io" prepends each binary message with a byte indicating
// the channel number (zero indexed) the message was sent on. Messages in both directions should
// prefix their messages with this channel byte. When used for remote execution, the channel numbers
// are by convention defined to match the POSIX file-descriptors assigned to STDIN, STDOUT, and STDERR
// (0, 1, and 2). No other conversion is performed on the raw subprotocol - writes are sent as they
// are received by the server.
//
// Example client session:
//
// CONNECT http://server.com with subprotocol "channel.k8s.io"
// WRITE []byte{0, 102, 111, 111, 10} # send "foo\n" on channel 0 (STDIN)
// READ []byte{1, 10} # receive "\n" on channel 1 (STDOUT)
// CLOSE
const ChannelWebSocketProtocol = "channel.k8s.io"
// The Websocket subprotocol "base64.channel.k8s.io" base64 encodes each message with a character
// indicating the channel number (zero indexed) the message was sent on. Messages in both directions
// should prefix their messages with this channel char. When used for remote execution, the channel
// numbers are by convention defined to match the POSIX file-descriptors assigned to STDIN, STDOUT,
// and STDERR ('0', '1', and '2'). The data received on the server is base64 decoded (and must be
// be valid) and data written by the server to the client is base64 encoded.
//
// Example client session:
//
// CONNECT http://server.com with subprotocol "base64.channel.k8s.io"
// WRITE []byte{48, 90, 109, 57, 118, 67, 103, 111, 61} # send "foo\n" (base64: "Zm9vCgo=") on channel '0' (STDIN)
// READ []byte{49, 67, 103, 61, 61} # receive "\n" (base64: "Cg==") on channel '1' (STDOUT)
// CLOSE
const Base64ChannelWebSocketProtocol = "base64.channel.k8s.io"
type codecType int
const (
rawCodec codecType = iota
base64Codec
)
type ChannelType int
const (
IgnoreChannel ChannelType = iota
ReadChannel
WriteChannel
ReadWriteChannel
)
var (
// connectionUpgradeRegex matches any Connection header value that includes upgrade
connectionUpgradeRegex = regexp.MustCompile("(^|.*,\\s*)upgrade($|\\s*,)")
)
// IsWebSocketRequest returns true if the incoming request contains connection upgrade headers
// for WebSockets.
func IsWebSocketRequest(req *http.Request) bool {
if !strings.EqualFold(req.Header.Get("Upgrade"), "websocket") {
return false
}
return connectionUpgradeRegex.MatchString(strings.ToLower(req.Header.Get("Connection")))
}
// IgnoreReceives reads from a WebSocket until it is closed, then returns. If timeout is set, the
// read and write deadlines are pushed every time a new message is received.
func IgnoreReceives(ws *websocket.Conn, timeout time.Duration) {
defer runtime.HandleCrash()
var data []byte
for {
resetTimeout(ws, timeout)
if err := websocket.Message.Receive(ws, &data); err != nil {
return
}
}
}
// handshake ensures the provided user protocol matches one of the allowed protocols. It returns
// no error if no protocol is specified.
func handshake(config *websocket.Config, req *http.Request, allowed []string) error {
protocols := config.Protocol
if len(protocols) == 0 {
protocols = []string{""}
}
for _, protocol := range protocols {
for _, allow := range allowed {
if allow == protocol {
config.Protocol = []string{protocol}
return nil
}
}
}
return fmt.Errorf("requested protocol(s) are not supported: %v; supports %v", config.Protocol, allowed)
}
// ChannelProtocolConfig describes a websocket subprotocol with channels.
type ChannelProtocolConfig struct {
Binary bool
Channels []ChannelType
}
// NewDefaultChannelProtocols returns a channel protocol map with the
// subprotocols "", "channel.k8s.io", "base64.channel.k8s.io" and the given
// channels.
func NewDefaultChannelProtocols(channels []ChannelType) map[string]ChannelProtocolConfig {
return map[string]ChannelProtocolConfig{
"": {Binary: true, Channels: channels},
ChannelWebSocketProtocol: {Binary: true, Channels: channels},
Base64ChannelWebSocketProtocol: {Binary: false, Channels: channels},
}
}
// Conn supports sending multiple binary channels over a websocket connection.
type Conn struct {
protocols map[string]ChannelProtocolConfig
selectedProtocol string
channels []*websocketChannel
codec codecType
ready chan struct{}
ws *websocket.Conn
timeout time.Duration
}
// NewConn creates a WebSocket connection that supports a set of channels. Channels begin each
// web socket message with a single byte indicating the channel number (0-N). 255 is reserved for
// future use. The channel types for each channel are passed as an array, supporting the different
// duplex modes. Read and Write refer to whether the channel can be used as a Reader or Writer.
//
// The protocols parameter maps subprotocol names to ChannelProtocols. The empty string subprotocol
// name is used if websocket.Config.Protocol is empty.
func NewConn(protocols map[string]ChannelProtocolConfig) *Conn {
return &Conn{
ready: make(chan struct{}),
protocols: protocols,
}
}
// SetIdleTimeout sets the interval for both reads and writes before timeout. If not specified,
// there is no timeout on the connection.
func (conn *Conn) SetIdleTimeout(duration time.Duration) {
conn.timeout = duration
}
// Open the connection and create channels for reading and writing. It returns
// the selected subprotocol, a slice of channels and an error.
func (conn *Conn) Open(w http.ResponseWriter, req *http.Request) (string, []io.ReadWriteCloser, error) {
go func() {
defer runtime.HandleCrash()
defer conn.Close()
websocket.Server{Handshake: conn.handshake, Handler: conn.handle}.ServeHTTP(w, req)
}()
<-conn.ready
rwc := make([]io.ReadWriteCloser, len(conn.channels))
for i := range conn.channels {
rwc[i] = conn.channels[i]
}
return conn.selectedProtocol, rwc, nil
}
func (conn *Conn) initialize(ws *websocket.Conn) {
negotiated := ws.Config().Protocol
conn.selectedProtocol = negotiated[0]
p := conn.protocols[conn.selectedProtocol]
if p.Binary {
conn.codec = rawCodec
} else {
conn.codec = base64Codec
}
conn.ws = ws
conn.channels = make([]*websocketChannel, len(p.Channels))
for i, t := range p.Channels {
switch t {
case ReadChannel:
conn.channels[i] = newWebsocketChannel(conn, byte(i), true, false)
case WriteChannel:
conn.channels[i] = newWebsocketChannel(conn, byte(i), false, true)
case ReadWriteChannel:
conn.channels[i] = newWebsocketChannel(conn, byte(i), true, true)
case IgnoreChannel:
conn.channels[i] = newWebsocketChannel(conn, byte(i), false, false)
}
}
close(conn.ready)
}
func (conn *Conn) handshake(config *websocket.Config, req *http.Request) error {
supportedProtocols := make([]string, 0, len(conn.protocols))
for p := range conn.protocols {
supportedProtocols = append(supportedProtocols, p)
}
return handshake(config, req, supportedProtocols)
}
func (conn *Conn) resetTimeout() {
if conn.timeout > 0 {
conn.ws.SetDeadline(time.Now().Add(conn.timeout))
}
}
// Close is only valid after Open has been called
func (conn *Conn) Close() error {
<-conn.ready
for _, s := range conn.channels {
s.Close()
}
conn.ws.Close()
return nil
}
// handle implements a websocket handler.
func (conn *Conn) handle(ws *websocket.Conn) {
defer conn.Close()
conn.initialize(ws)
for {
conn.resetTimeout()
var data []byte
if err := websocket.Message.Receive(ws, &data); err != nil {
if err != io.EOF {
klog.Errorf("Error on socket receive: %v", err)
}
break
}
if len(data) == 0 {
continue
}
channel := data[0]
if conn.codec == base64Codec {
channel = channel - '0'
}
data = data[1:]
if int(channel) >= len(conn.channels) {
klog.V(6).Infof("Frame is targeted for a reader %d that is not valid, possible protocol error", channel)
continue
}
if _, err := conn.channels[channel].DataFromSocket(data); err != nil {
klog.Errorf("Unable to write frame to %d: %v\n%s", channel, err, string(data))
continue
}
}
}
// write multiplexes the specified channel onto the websocket
func (conn *Conn) write(num byte, data []byte) (int, error) {
conn.resetTimeout()
switch conn.codec {
case rawCodec:
frame := make([]byte, len(data)+1)
frame[0] = num
copy(frame[1:], data)
if err := websocket.Message.Send(conn.ws, frame); err != nil {
return 0, err
}
case base64Codec:
frame := string('0'+num) + base64.StdEncoding.EncodeToString(data)
if err := websocket.Message.Send(conn.ws, frame); err != nil {
return 0, err
}
}
return len(data), nil
}
// websocketChannel represents a channel in a connection
type websocketChannel struct {
conn *Conn
num byte
r io.Reader
w io.WriteCloser
read, write bool
}
// newWebsocketChannel creates a pipe for writing to a websocket. Do not write to this pipe
// prior to the connection being opened. It may be no, half, or full duplex depending on
// read and write.
func newWebsocketChannel(conn *Conn, num byte, read, write bool) *websocketChannel {
r, w := io.Pipe()
return &websocketChannel{conn, num, r, w, read, write}
}
func (p *websocketChannel) Write(data []byte) (int, error) {
if !p.write {
return len(data), nil
}
return p.conn.write(p.num, data)
}
// DataFromSocket is invoked by the connection receiver to move data from the connection
// into a specific channel.
func (p *websocketChannel) DataFromSocket(data []byte) (int, error) {
if !p.read {
return len(data), nil
}
switch p.conn.codec {
case rawCodec:
return p.w.Write(data)
case base64Codec:
dst := make([]byte, len(data))
n, err := base64.StdEncoding.Decode(dst, data)
if err != nil {
return 0, err
}
return p.w.Write(dst[:n])
}
return 0, nil
}
func (p *websocketChannel) Read(data []byte) (int, error) {
if !p.read {
return 0, io.EOF
}
return p.r.Read(data)
}
func (p *websocketChannel) Close() error {
return p.w.Close()
}

21
vendor/k8s.io/apiserver/pkg/util/wsstream/doc.go generated vendored Normal file
View File

@ -0,0 +1,21 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package wsstream contains utilities for streaming content over WebSockets.
// The Conn type allows callers to multiplex multiple read/write channels over
// a single websocket. The Reader type allows an io.Reader to be copied over
// a websocket channel as binary content.
package wsstream // import "k8s.io/apiserver/pkg/util/wsstream"

177
vendor/k8s.io/apiserver/pkg/util/wsstream/stream.go generated vendored Normal file
View File

@ -0,0 +1,177 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package wsstream
import (
"encoding/base64"
"io"
"net/http"
"sync"
"time"
"golang.org/x/net/websocket"
"k8s.io/apimachinery/pkg/util/runtime"
)
// The WebSocket subprotocol "binary.k8s.io" will only send messages to the
// client and ignore messages sent to the server. The received messages are
// the exact bytes written to the stream. Zero byte messages are possible.
const binaryWebSocketProtocol = "binary.k8s.io"
// The WebSocket subprotocol "base64.binary.k8s.io" will only send messages to the
// client and ignore messages sent to the server. The received messages are
// a base64 version of the bytes written to the stream. Zero byte messages are
// possible.
const base64BinaryWebSocketProtocol = "base64.binary.k8s.io"
// ReaderProtocolConfig describes a websocket subprotocol with one stream.
type ReaderProtocolConfig struct {
Binary bool
}
// NewDefaultReaderProtocols returns a stream protocol map with the
// subprotocols "", "channel.k8s.io", "base64.channel.k8s.io".
func NewDefaultReaderProtocols() map[string]ReaderProtocolConfig {
return map[string]ReaderProtocolConfig{
"": {Binary: true},
binaryWebSocketProtocol: {Binary: true},
base64BinaryWebSocketProtocol: {Binary: false},
}
}
// Reader supports returning an arbitrary byte stream over a websocket channel.
type Reader struct {
err chan error
r io.Reader
ping bool
timeout time.Duration
protocols map[string]ReaderProtocolConfig
selectedProtocol string
handleCrash func(additionalHandlers ...func(interface{})) // overridable for testing
}
// NewReader creates a WebSocket pipe that will copy the contents of r to a provided
// WebSocket connection. If ping is true, a zero length message will be sent to the client
// before the stream begins reading.
//
// The protocols parameter maps subprotocol names to StreamProtocols. The empty string
// subprotocol name is used if websocket.Config.Protocol is empty.
func NewReader(r io.Reader, ping bool, protocols map[string]ReaderProtocolConfig) *Reader {
return &Reader{
r: r,
err: make(chan error),
ping: ping,
protocols: protocols,
handleCrash: runtime.HandleCrash,
}
}
// SetIdleTimeout sets the interval for both reads and writes before timeout. If not specified,
// there is no timeout on the reader.
func (r *Reader) SetIdleTimeout(duration time.Duration) {
r.timeout = duration
}
func (r *Reader) handshake(config *websocket.Config, req *http.Request) error {
supportedProtocols := make([]string, 0, len(r.protocols))
for p := range r.protocols {
supportedProtocols = append(supportedProtocols, p)
}
return handshake(config, req, supportedProtocols)
}
// Copy the reader to the response. The created WebSocket is closed after this
// method completes.
func (r *Reader) Copy(w http.ResponseWriter, req *http.Request) error {
go func() {
defer r.handleCrash()
websocket.Server{Handshake: r.handshake, Handler: r.handle}.ServeHTTP(w, req)
}()
return <-r.err
}
// handle implements a WebSocket handler.
func (r *Reader) handle(ws *websocket.Conn) {
// Close the connection when the client requests it, or when we finish streaming, whichever happens first
closeConnOnce := &sync.Once{}
closeConn := func() {
closeConnOnce.Do(func() {
ws.Close()
})
}
negotiated := ws.Config().Protocol
r.selectedProtocol = negotiated[0]
defer close(r.err)
defer closeConn()
go func() {
defer runtime.HandleCrash()
// This blocks until the connection is closed.
// Client should not send anything.
IgnoreReceives(ws, r.timeout)
// Once the client closes, we should also close
closeConn()
}()
r.err <- messageCopy(ws, r.r, !r.protocols[r.selectedProtocol].Binary, r.ping, r.timeout)
}
func resetTimeout(ws *websocket.Conn, timeout time.Duration) {
if timeout > 0 {
ws.SetDeadline(time.Now().Add(timeout))
}
}
func messageCopy(ws *websocket.Conn, r io.Reader, base64Encode, ping bool, timeout time.Duration) error {
buf := make([]byte, 2048)
if ping {
resetTimeout(ws, timeout)
if base64Encode {
if err := websocket.Message.Send(ws, ""); err != nil {
return err
}
} else {
if err := websocket.Message.Send(ws, []byte{}); err != nil {
return err
}
}
}
for {
resetTimeout(ws, timeout)
n, err := r.Read(buf)
if err != nil {
if err == io.EOF {
return nil
}
return err
}
if n > 0 {
if base64Encode {
if err := websocket.Message.Send(ws, base64.StdEncoding.EncodeToString(buf[:n])); err != nil {
return err
}
} else {
if err := websocket.Message.Send(ws, buf[:n]); err != nil {
return err
}
}
}
}
}