rebase: bump the k8s-dependencies group in /e2e with 3 updates

Bumps the k8s-dependencies group in /e2e with 3 updates: [k8s.io/apimachinery](https://github.com/kubernetes/apimachinery), [k8s.io/cloud-provider](https://github.com/kubernetes/cloud-provider) and [k8s.io/pod-security-admission](https://github.com/kubernetes/pod-security-admission).


Updates `k8s.io/apimachinery` from 0.32.3 to 0.33.0
- [Commits](https://github.com/kubernetes/apimachinery/compare/v0.32.3...v0.33.0)

Updates `k8s.io/cloud-provider` from 0.32.3 to 0.33.0
- [Commits](https://github.com/kubernetes/cloud-provider/compare/v0.32.3...v0.33.0)

Updates `k8s.io/pod-security-admission` from 0.32.3 to 0.33.0
- [Commits](https://github.com/kubernetes/pod-security-admission/compare/v0.32.3...v0.33.0)

---
updated-dependencies:
- dependency-name: k8s.io/apimachinery
  dependency-version: 0.33.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: k8s-dependencies
- dependency-name: k8s.io/cloud-provider
  dependency-version: 0.33.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: k8s-dependencies
- dependency-name: k8s.io/pod-security-admission
  dependency-version: 0.33.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: k8s-dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2025-05-06 11:20:01 +00:00
committed by mergify[bot]
parent d52dc2c4ba
commit dd77e72800
359 changed files with 11145 additions and 18557 deletions

View File

@ -16,4 +16,4 @@ limitations under the License.
// +k8s:deepcopy-gen=package
package webhookadmission // import "k8s.io/apiserver/pkg/admission/plugin/webhook/config/apis/webhookadmission"
package webhookadmission

View File

@ -20,4 +20,4 @@ limitations under the License.
// +groupName=apiserver.config.k8s.io
// Package v1 is the v1 version of the API.
package v1 // import "k8s.io/apiserver/pkg/admission/plugin/webhook/config/apis/webhookadmission/v1"
package v1

View File

@ -20,4 +20,4 @@ limitations under the License.
// +groupName=apiserver.config.k8s.io
// Package v1alpha1 is the v1alpha1 version of the API.
package v1alpha1 // import "k8s.io/apiserver/pkg/admission/plugin/webhook/config/apis/webhookadmission/v1alpha1"
package v1alpha1

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package errors contains utilities for admission webhook specific errors
package errors // import "k8s.io/apiserver/pkg/admission/plugin/webhook/errors"
package errors

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package mutating makes calls to mutating webhooks during the admission
// process.
package mutating // import "k8s.io/apiserver/pkg/admission/plugin/webhook/mutating"
package mutating

View File

@ -17,4 +17,4 @@ limitations under the License.
// Package namespace defines the utilities that are used by the webhook
// plugin to decide if a webhook should be applied to an object based on its
// namespace.
package namespace // import "k8s.io/apiserver/pkg/admission/plugin/webhook/predicates/namespace"
package namespace

View File

@ -17,4 +17,4 @@ limitations under the License.
// Package object defines the utilities that are used by the webhook plugin to
// decide if a webhook should run, as long as either the old object or the new
// object has labels matching the webhook config's objectSelector.
package object // import "k8s.io/apiserver/pkg/admission/plugin/webhook/predicates/object"
package object

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package request creates admissionReview request based on admission attributes.
package request // import "k8s.io/apiserver/pkg/admission/plugin/webhook/request"
package request

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package validating makes calls to validating (i.e., non-mutating) webhooks
// during the admission process.
package validating // import "k8s.io/apiserver/pkg/admission/plugin/webhook/validating"
package validating

View File

@ -16,4 +16,4 @@ limitations under the License.
// +groupName=apidiscovery.k8s.io
package v2 // import "k8s.io/apiserver/pkg/apis/apidiscovery/v2"
package v2

View File

@ -18,4 +18,4 @@ limitations under the License.
// +groupName=apiserver.k8s.io
// Package apiserver is the internal version of the API.
package apiserver // import "k8s.io/apiserver/pkg/apis/apiserver"
package apiserver

View File

@ -20,4 +20,4 @@ limitations under the License.
// +groupName=apiserver.config.k8s.io
// Package v1 is the v1 version of the API.
package v1 // import "k8s.io/apiserver/pkg/apis/apiserver/v1"
package v1

View File

@ -21,4 +21,4 @@ limitations under the License.
// +groupName=apiserver.config.k8s.io
// Package v1alpha1 is the v1alpha1 version of the API.
package v1alpha1 // import "k8s.io/apiserver/pkg/apis/apiserver/v1alpha1"
package v1alpha1

View File

@ -352,7 +352,9 @@ type ClaimMappings struct {
// If username.expression uses 'claims.email', then 'claims.email_verified' must be used in
// username.expression or extra[*].valueExpression or claimValidationRules[*].expression.
// An example claim validation rule expression that matches the validation automatically
// applied when username.claim is set to 'email' is 'claims.?email_verified.orValue(true)'.
// applied when username.claim is set to 'email' is 'claims.?email_verified.orValue(true) == true'. By explicitly comparing
// the value to true, we let type-checking see the result will be a boolean, and to make sure a non-boolean email_verified
// claim will be caught at runtime.
//
// In the flag based approach, the --oidc-username-claim and --oidc-username-prefix are optional. If --oidc-username-claim is not set,
// the default value is "sub". For the authentication config, there is no defaulting for claim or prefix. The claim and prefix must be set explicitly.

View File

@ -20,4 +20,4 @@ limitations under the License.
// +groupName=apiserver.k8s.io
// Package v1beta1 is the v1beta1 version of the API.
package v1beta1 // import "k8s.io/apiserver/pkg/apis/apiserver/v1beta1"
package v1beta1

View File

@ -323,7 +323,9 @@ type ClaimMappings struct {
// If username.expression uses 'claims.email', then 'claims.email_verified' must be used in
// username.expression or extra[*].valueExpression or claimValidationRules[*].expression.
// An example claim validation rule expression that matches the validation automatically
// applied when username.claim is set to 'email' is 'claims.?email_verified.orValue(true)'.
// applied when username.claim is set to 'email' is 'claims.?email_verified.orValue(true) == true'. By explicitly comparing
// the value to true, we let type-checking see the result will be a boolean, and to make sure a non-boolean email_verified
// claim will be caught at runtime.
//
// In the flag based approach, the --oidc-username-claim and --oidc-username-prefix are optional. If --oidc-username-claim is not set,
// the default value is "sub". For the authentication config, there is no defaulting for claim or prefix. The claim and prefix must be set explicitly.

View File

@ -17,4 +17,4 @@ limitations under the License.
// +k8s:deepcopy-gen=package
// +groupName=audit.k8s.io
package audit // import "k8s.io/apiserver/pkg/apis/audit"
package audit

View File

@ -22,4 +22,4 @@ limitations under the License.
// +groupName=audit.k8s.io
package v1 // import "k8s.io/apiserver/pkg/apis/audit/v1"
package v1

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package x509 provides a request authenticator that validates and
// extracts user information from client certificates
package x509 // import "k8s.io/apiserver/pkg/authentication/request/x509"
package x509

View File

@ -21,15 +21,19 @@ import (
"crypto/x509"
"crypto/x509/pkix"
"encoding/hex"
"errors"
"fmt"
"net/http"
"strings"
"time"
asn1util "k8s.io/apimachinery/pkg/apis/asn1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apiserver/pkg/authentication/authenticator"
"k8s.io/apiserver/pkg/authentication/user"
"k8s.io/apiserver/pkg/features"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
@ -281,9 +285,14 @@ var CommonNameUserConversion = UserConversionFunc(func(chain []*x509.Certificate
fp := sha256.Sum256(chain[0].Raw)
id := "X509SHA256=" + hex.EncodeToString(fp[:])
uid, err := parseUIDFromCert(chain[0])
if err != nil {
return nil, false, err
}
return &authenticator.Response{
User: &user.DefaultInfo{
Name: chain[0].Subject.CommonName,
UID: uid,
Groups: chain[0].Subject.Organization,
Extra: map[string][]string{
user.CredentialIDKey: {id},
@ -291,3 +300,33 @@ var CommonNameUserConversion = UserConversionFunc(func(chain []*x509.Certificate
},
}, true, nil
})
var uidOID = asn1util.X509UID()
func parseUIDFromCert(cert *x509.Certificate) (string, error) {
if !utilfeature.DefaultFeatureGate.Enabled(features.AllowParsingUserUIDFromCertAuth) {
return "", nil
}
uids := []string{}
for _, name := range cert.Subject.Names {
if !name.Type.Equal(uidOID) {
continue
}
uid, ok := name.Value.(string)
if !ok {
return "", fmt.Errorf("unable to parse UID into a string")
}
uids = append(uids, uid)
}
if len(uids) == 0 {
return "", nil
}
if len(uids) != 1 {
return "", fmt.Errorf("expected 1 UID, but found multiple: %v", uids)
}
if uids[0] == "" {
return "", errors.New("UID cannot be an empty string")
}
return uids[0], nil
}

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package user contains utilities for dealing with simple user exchange in the auth
// packages. The user.Info interface defines an interface for exchanging that info.
package user // import "k8s.io/apiserver/pkg/authentication/user"
package user

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package path contains an authorizer that allows certain paths and path prefixes.
package path // import "k8s.io/apiserver/pkg/authorization/path"
package path

View File

@ -32,9 +32,9 @@ import (
celconfig "k8s.io/apiserver/pkg/apis/cel"
"k8s.io/apiserver/pkg/cel/library"
genericfeatures "k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/util/compatibility"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
utilversion "k8s.io/component-base/version"
basecompatibility "k8s.io/component-base/compatibility"
)
// DefaultCompatibilityVersion returns a default compatibility version for use with EnvSet
@ -50,9 +50,9 @@ import (
// A default version number equal to the current Kubernetes major.minor version
// indicates fast forward CEL features that can be used when rollback is no longer needed.
func DefaultCompatibilityVersion() *version.Version {
effectiveVer := featuregate.DefaultComponentGlobalsRegistry.EffectiveVersionFor(featuregate.DefaultKubeComponent)
effectiveVer := compatibility.DefaultComponentGlobalsRegistry.EffectiveVersionFor(basecompatibility.DefaultKubeComponent)
if effectiveVer == nil {
effectiveVer = utilversion.DefaultKubeEffectiveVersion()
effectiveVer = compatibility.DefaultBuildEffectiveVersion()
}
return effectiveVer.MinCompatibilityVersion()
}
@ -173,7 +173,14 @@ var baseOptsWithoutStrictCost = []VersionedOptions{
{
IntroducedVersion: version.MajorMinor(1, 32),
EnvOptions: []cel.EnvOption{
UnversionedLib(ext.TwoVarComprehensions),
ext.TwoVarComprehensions(),
},
},
// Semver
{
IntroducedVersion: version.MajorMinor(1, 33),
EnvOptions: []cel.EnvOption{
library.SemverLib(library.SemverVersion(1)),
},
},
}

View File

@ -160,9 +160,7 @@ var cidrLibraryDecls = map[string][]cel.FunctionOpt{
}
func (*cidrs) CompileOptions() []cel.EnvOption {
options := []cel.EnvOption{cel.Types(apiservercel.CIDRType),
cel.Variable(apiservercel.CIDRType.TypeName(), types.NewTypeTypeWithParam(apiservercel.CIDRType)),
}
options := []cel.EnvOption{cel.Types(apiservercel.CIDRType)}
for name, overloads := range cidrLibraryDecls {
options = append(options, cel.Function(name, overloads...))
}
@ -231,8 +229,7 @@ func cidrContainsCIDR(arg ref.Val, other ref.Val) ref.Val {
return types.MaybeNoSuchOverloadErr(other)
}
equalMasked := cidr.Prefix.Masked() == netip.PrefixFrom(containsCIDR.Prefix.Addr(), cidr.Prefix.Bits())
return types.Bool(equalMasked && cidr.Prefix.Bits() <= containsCIDR.Prefix.Bits())
return types.Bool(cidr.Overlaps(containsCIDR.Prefix) && cidr.Prefix.Bits() <= containsCIDR.Prefix.Bits())
}
func prefixLength(arg ref.Val) ref.Val {

View File

@ -18,13 +18,14 @@ package library
import (
"fmt"
"math"
"github.com/google/cel-go/checker"
"github.com/google/cel-go/common"
"github.com/google/cel-go/common/ast"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/common/types/ref"
"github.com/google/cel-go/common/types/traits"
"math"
"k8s.io/apiserver/pkg/cel"
)
@ -202,7 +203,7 @@ func (l *CostEstimator) CallCost(function, overloadId string, args []ref.Val, re
return &cost
}
case "quantity", "isQuantity":
case "quantity", "isQuantity", "semver", "isSemver":
if len(args) >= 1 {
cost := uint64(math.Ceil(float64(actualSize(args[0])) * common.StringTraversalCostFactor))
return &cost
@ -236,7 +237,7 @@ func (l *CostEstimator) CallCost(function, overloadId string, args []ref.Val, re
// Simply dictionary lookup
cost := uint64(1)
return &cost
case "sign", "asInteger", "isInteger", "asApproximateFloat", "isGreaterThan", "isLessThan", "compareTo", "add", "sub":
case "sign", "asInteger", "isInteger", "asApproximateFloat", "isGreaterThan", "isLessThan", "compareTo", "add", "sub", "major", "minor", "patch":
cost := uint64(1)
return &cost
case "getScheme", "getHostname", "getHost", "getPort", "getEscapedPath", "getQuery":
@ -486,7 +487,7 @@ func (l *CostEstimator) EstimateCallCost(function, overloadId string, target *ch
return &checker.CallEstimate{CostEstimate: ipCompCost}
}
case "quantity", "isQuantity":
case "quantity", "isQuantity", "semver", "isSemver":
if target != nil {
sz := l.sizeEstimate(args[0])
return &checker.CallEstimate{CostEstimate: sz.MultiplyByCostFactor(common.StringTraversalCostFactor)}
@ -498,7 +499,7 @@ func (l *CostEstimator) EstimateCallCost(function, overloadId string, target *ch
}
case "format.named":
return &checker.CallEstimate{CostEstimate: checker.CostEstimate{Min: 1, Max: 1}}
case "sign", "asInteger", "isInteger", "asApproximateFloat", "isGreaterThan", "isLessThan", "compareTo", "add", "sub":
case "sign", "asInteger", "isInteger", "asApproximateFloat", "isGreaterThan", "isLessThan", "compareTo", "add", "sub", "major", "minor", "patch":
return &checker.CallEstimate{CostEstimate: checker.CostEstimate{Min: 1, Max: 1}}
case "getScheme", "getHostname", "getHost", "getPort", "getEscapedPath", "getQuery":
// url accessors

View File

@ -20,7 +20,6 @@ import (
"fmt"
"net/url"
"github.com/asaskevich/govalidator"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/decls"
"github.com/google/cel-go/common/types"
@ -32,6 +31,13 @@ import (
"k8s.io/kube-openapi/pkg/validation/strfmt"
)
var (
// base64_length estimate for base64 regex size from github.com/asaskevich/govalidator
base64Length = 84
// url_length estimate for url regex size from github.com/asaskevich/govalidator
urlLength = 1103
)
// Format provides a CEL library exposing common named Kubernetes string
// validations. Can be used in CRD ValidationRules messageExpression.
//
@ -193,7 +199,7 @@ var ConstantFormats = map[string]apiservercel.Format{
},
// Use govalidator url regex to estimate, since ParseRequestURI
// doesnt use regex
MaxRegexSize: len(govalidator.URL),
MaxRegexSize: urlLength,
},
"uuid": {
Name: "uuid",
@ -213,7 +219,7 @@ var ConstantFormats = map[string]apiservercel.Format{
}
return nil
},
MaxRegexSize: len(govalidator.Base64),
MaxRegexSize: base64Length,
},
"date": {
Name: "date",

View File

@ -187,9 +187,7 @@ var ipLibraryDecls = map[string][]cel.FunctionOpt{
}
func (*ip) CompileOptions() []cel.EnvOption {
options := []cel.EnvOption{cel.Types(apiservercel.IPType),
cel.Variable(apiservercel.IPType.TypeName(), types.NewTypeTypeWithParam(apiservercel.IPType)),
}
options := []cel.EnvOption{cel.Types(apiservercel.IPType)}
for name, overloads := range ipLibraryDecls {
options = append(options, cel.Function(name, overloads...))
}

View File

@ -17,6 +17,10 @@ limitations under the License.
package library
import (
"errors"
"math"
"strings"
"github.com/blang/semver/v4"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
@ -31,8 +35,10 @@ import (
//
// Converts a string to a semantic version or results in an error if the string is not a valid semantic version. Refer
// to semver.org documentation for information on accepted patterns.
//
// An optional "normalize" argument can be passed to enable normalization. Normalization removes any "v" prefix, adds a
// 0 minor and patch numbers to versions with only major or major.minor components specified, and removes any leading 0s.
// semver(<string>) <Semver>
// semver(<string>, <bool>) <Semver>
//
// Examples:
//
@ -41,19 +47,28 @@ import (
// semver('200K') // error
// semver('Three') // error
// semver('Mi') // error
// semver('v1.0.0', true) // Applies normalization to remove the leading "v". Returns a Semver of "1.0.0".
// semver('1.0', true) // Applies normalization to add the missing patch version. Returns a Semver of "1.0.0"
// semver('01.01.01', true) // Applies normalization to remove leading zeros. Returns a Semver of "1.1.1"
//
// isSemver
//
// Returns true if a string is a valid Semver. isSemver returns true if and
// only if semver does not result in error.
// An optional "normalize" argument can be passed to enable normalization. Normalization removes any "v" prefix, adds a
// 0 minor and patch numbers to versions with only major or major.minor components specified, and removes any leading 0s.
//
// isSemver( <string>) <bool>
// isSemver( <string>, <bool>) <bool>
//
// Examples:
//
// isSemver('1.0.0') // returns true
// isSemver('v1.0') // returns true (tolerant parsing)
// isSemver('hello') // returns false
// isSemver('v1.0') // returns false (leading "v" is not allowed unless normalization is enabled)
// isSemver('v1.0', true) // Applies normalization to remove leading "v". returns true
// semver('1.0', true) // Applies normalization to add the missing patch version. Returns true
// semver('01.01.01', true) // Applies normalization to remove leading zeros. Returns true
//
// Conversion to Scalars:
//
@ -84,13 +99,29 @@ import (
// semver("1.2.3").compareTo(semver("2.0.0")) // returns -1
// semver("1.2.3").compareTo(semver("0.1.2")) // returns 1
func SemverLib() cel.EnvOption {
func SemverLib(options ...SemverOption) cel.EnvOption {
semverLib := &semverLibType{}
for _, o := range options {
semverLib = o(semverLib)
}
return cel.Lib(semverLib)
}
var semverLib = &semverLibType{}
var semverLib = &semverLibType{version: math.MaxUint32} // include all versions
type semverLibType struct{}
type semverLibType struct {
version uint32
}
// StringsOption is a functional interface for configuring the strings library.
type SemverOption func(*semverLibType) *semverLibType
func SemverVersion(version uint32) SemverOption {
return func(lib *semverLibType) *semverLibType {
lib.version = version
return lib
}
}
func (*semverLibType) LibraryName() string {
return "kubernetes.Semver"
@ -100,8 +131,8 @@ func (*semverLibType) Types() []*cel.Type {
return []*cel.Type{apiservercel.SemverType}
}
func (*semverLibType) declarations() map[string][]cel.FunctionOpt {
return map[string][]cel.FunctionOpt{
func (lib *semverLibType) declarations() map[string][]cel.FunctionOpt {
fnOpts := map[string][]cel.FunctionOpt{
"semver": {
cel.Overload("string_to_semver", []*cel.Type{cel.StringType}, apiservercel.SemverType, cel.UnaryBinding((stringToSemver))),
},
@ -127,6 +158,11 @@ func (*semverLibType) declarations() map[string][]cel.FunctionOpt {
cel.MemberOverload("semver_patch", []*cel.Type{apiservercel.SemverType}, cel.IntType, cel.UnaryBinding(semverPatch)),
},
}
if lib.version >= 1 {
fnOpts["semver"] = append(fnOpts["semver"], cel.Overload("string_bool_to_semver", []*cel.Type{cel.StringType, cel.BoolType}, apiservercel.SemverType, cel.BinaryBinding((stringToSemverNormalize))))
fnOpts["isSemver"] = append(fnOpts["isSemver"], cel.Overload("is_semver_string_bool", []*cel.Type{cel.StringType, cel.BoolType}, cel.BoolType, cel.BinaryBinding(isSemverNormalize)))
}
return fnOpts
}
func (s *semverLibType) CompileOptions() []cel.EnvOption {
@ -144,16 +180,29 @@ func (*semverLibType) ProgramOptions() []cel.ProgramOption {
}
func isSemver(arg ref.Val) ref.Val {
return isSemverNormalize(arg, types.Bool(false))
}
func isSemverNormalize(arg ref.Val, normalizeArg ref.Val) ref.Val {
str, ok := arg.Value().(string)
if !ok {
return types.MaybeNoSuchOverloadErr(arg)
}
normalize, ok := normalizeArg.Value().(bool)
if !ok {
return types.MaybeNoSuchOverloadErr(arg)
}
// Using semver/v4 here is okay because this function isn't
// used to validate the Kubernetes API. In the CEL base library
// we would have to use the regular expression from
// pkg/apis/resource/structured/namedresources/validation/validation.go.
_, err := semver.Parse(str)
var err error
if normalize {
_, err = normalizeAndParse(str)
} else {
_, err = semver.Parse(str)
}
if err != nil {
return types.Bool(false)
}
@ -162,17 +211,31 @@ func isSemver(arg ref.Val) ref.Val {
}
func stringToSemver(arg ref.Val) ref.Val {
return stringToSemverNormalize(arg, types.Bool(false))
}
func stringToSemverNormalize(arg ref.Val, normalizeArg ref.Val) ref.Val {
str, ok := arg.Value().(string)
if !ok {
return types.MaybeNoSuchOverloadErr(arg)
}
normalize, ok := normalizeArg.Value().(bool)
if !ok {
return types.MaybeNoSuchOverloadErr(arg)
}
// Using semver/v4 here is okay because this function isn't
// used to validate the Kubernetes API. In the CEL base library
// we would have to use the regular expression from
// pkg/apis/resource/structured/namedresources/validation/validation.go
// first before parsing.
v, err := semver.Parse(str)
var err error
var v semver.Version
if normalize {
v, err = normalizeAndParse(str)
} else {
v, err = semver.Parse(str)
}
if err != nil {
return types.WrapErr(err)
}
@ -245,3 +308,37 @@ func semverCompareTo(arg ref.Val, other ref.Val) ref.Val {
return types.Int(v.Compare(v2))
}
// normalizeAndParse removes any "v" prefix, adds a 0 minor and patch numbers to versions with
// only major or major.minor components specified, and removes any leading 0s.
// normalizeAndParse is based on semver.ParseTolerant but does not trim extra whitespace and is
// guaranteed to not change behavior in the future.
func normalizeAndParse(s string) (semver.Version, error) {
s = strings.TrimPrefix(s, "v")
// Split into major.minor.(patch+pr+meta)
parts := strings.SplitN(s, ".", 3)
// Remove leading zeros.
for i, p := range parts {
if len(p) > 1 {
p = strings.TrimLeft(p, "0")
if len(p) == 0 || !strings.ContainsAny(p[0:1], "0123456789") {
p = "0" + p
}
parts[i] = p
}
}
// Fill up shortened versions.
if len(parts) < 3 {
if strings.ContainsAny(parts[len(parts)-1], "+-") {
return semver.Version{}, errors.New("short version cannot contain PreRelease/Build meta data")
}
for len(parts) < 3 {
parts = append(parts, "0")
}
}
s = strings.Join(parts, ".")
return semver.Parse(s)
}

View File

@ -25,7 +25,7 @@ import (
"time"
"github.com/emicklei/go-restful/v3"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp" //nolint:depguard
apidiscoveryv2 "k8s.io/api/apidiscovery/v2"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"

View File

@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/version"
apidiscoveryv2conversion "k8s.io/apiserver/pkg/apis/apidiscovery/v2"
genericfeatures "k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/endpoints/handlers/responsewriters"
@ -40,6 +41,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
)
@ -538,6 +540,14 @@ func (rdm *resourceDiscoveryManager) serveHTTP(resp http.ResponseWriter, req *ht
resp.WriteHeader(http.StatusInternalServerError)
return
}
if mediaType.Convert.GroupVersion() == apidiscoveryv2beta1.SchemeGroupVersion &&
utilfeature.DefaultFeatureGate.Enabled(genericfeatures.AggregatedDiscoveryRemoveBetaType) {
klog.Errorf("aggregated discovery version v2beta1 is removed. Please update to use v2")
resp.WriteHeader(http.StatusNotFound)
return
}
targetGV = mediaType.Convert.GroupVersion()
if len(etag) > 0 {

View File

@ -18,6 +18,9 @@ package aggregated
import (
"k8s.io/apimachinery/pkg/runtime/schema"
genericfeatures "k8s.io/apiserver/pkg/features"
utilfeature "k8s.io/apiserver/pkg/util/feature"
)
// Interface is from "k8s.io/apiserver/pkg/endpoints/handlers/negotiation"
@ -37,6 +40,9 @@ func (discoveryEndpointRestrictions) AllowsStreamSchema(s string) bool { return
// IsAggregatedDiscoveryGVK checks if a provided GVK is the GVK for serving aggregated discovery.
func IsAggregatedDiscoveryGVK(gvk *schema.GroupVersionKind) bool {
if gvk != nil {
if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.AggregatedDiscoveryRemoveBetaType) {
return gvk.Group == "apidiscovery.k8s.io" && gvk.Version == "v2" && gvk.Kind == "APIGroupDiscoveryList"
}
return gvk.Group == "apidiscovery.k8s.io" && (gvk.Version == "v2beta1" || gvk.Version == "v2") && gvk.Kind == "APIGroupDiscoveryList"
}
return false

View File

@ -28,8 +28,6 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apiserver/pkg/endpoints/handlers/negotiation"
genericfeatures "k8s.io/apiserver/pkg/features"
utilfeature "k8s.io/apiserver/pkg/util/feature"
)
type WrappedHandler struct {
@ -39,13 +37,11 @@ type WrappedHandler struct {
}
func (wrapped *WrappedHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.AggregatedDiscoveryEndpoint) {
mediaType, _ := negotiation.NegotiateMediaTypeOptions(req.Header.Get("Accept"), wrapped.s.SupportedMediaTypes(), DiscoveryEndpointRestrictions)
// mediaType.Convert looks at the request accept headers and is used to control whether the discovery document will be aggregated.
if IsAggregatedDiscoveryGVK(mediaType.Convert) {
wrapped.aggHandler.ServeHTTP(resp, req)
return
}
mediaType, _ := negotiation.NegotiateMediaTypeOptions(req.Header.Get("Accept"), wrapped.s.SupportedMediaTypes(), DiscoveryEndpointRestrictions)
// mediaType.Convert looks at the request accept headers and is used to control whether the discovery document will be aggregated.
if IsAggregatedDiscoveryGVK(mediaType.Convert) {
wrapped.aggHandler.ServeHTTP(resp, req)
return
}
wrapped.handler.ServeHTTP(resp, req)
}

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package endpoints contains the generic code that provides a RESTful Kubernetes-style API service.
package endpoints // import "k8s.io/apiserver/pkg/endpoints"
package endpoints

View File

@ -68,6 +68,7 @@ func withAuthentication(handler http.Handler, auth authenticator.Request, failed
authenticationFinish := time.Now()
defer func() {
metrics(req.Context(), resp, ok, err, apiAuds, authenticationStart, authenticationFinish)
genericapirequest.TrackAuthenticationLatency(req.Context(), authenticationFinish.Sub(authenticationStart))
}()
if err != nil || !ok {
if err != nil {
@ -118,7 +119,6 @@ func withAuthentication(handler http.Handler, auth authenticator.Request, failed
// https://github.com/golang/net/commit/97aa3a539ec716117a9d15a4659a911f50d13c3c
w.Header().Set("Connection", "close")
}
req = req.WithContext(genericapirequest.WithUser(req.Context(), resp.User))
handler.ServeHTTP(w, req)
})

View File

@ -73,6 +73,7 @@ func withAuthorization(handler http.Handler, a authorizer.Authorizer, s runtime.
authorizationFinish := time.Now()
defer func() {
metrics(ctx, authorized, err, authorizationStart, authorizationFinish)
request.TrackAuthorizationLatency(ctx, authorizationFinish.Sub(authorizationStart))
}()
// an authorizer like RBAC could encounter evaluation errors and still allow the request, so authorizer decision is checked before error here.

View File

@ -18,4 +18,4 @@ limitations under the License.
// _are_ api related, i.e. which are prerequisite for the API services
// to work (in contrast to the filters in the server package which are
// not part of the API contract).
package filters // import "k8s.io/apiserver/pkg/endpoints/filters"
package filters

View File

@ -24,6 +24,7 @@ import (
"go.opentelemetry.io/otel/trace"
"k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/authentication/user"
tracing "k8s.io/component-base/tracing"
)
@ -31,7 +32,7 @@ import (
func WithTracing(handler http.Handler, tp trace.TracerProvider) http.Handler {
opts := []otelhttp.Option{
otelhttp.WithPropagators(tracing.Propagators()),
otelhttp.WithPublicEndpoint(),
otelhttp.WithPublicEndpointFn(notSystemPrivilegedGroup),
otelhttp.WithTracerProvider(tp),
otelhttp.WithSpanNameFormatter(func(operation string, r *http.Request) string {
ctx := r.Context()
@ -43,6 +44,11 @@ func WithTracing(handler http.Handler, tp trace.TracerProvider) http.Handler {
}),
}
wrappedHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Adjust otelhttp tracing start time to match the start time used
// for Prometheus metrics.
if startTime, ok := request.ReceivedTimestampFrom(r.Context()); ok {
r = r.WithContext(otelhttp.ContextWithStartTime(r.Context(), startTime))
}
// Add the http.target attribute to the otelhttp span
// Workaround for https://github.com/open-telemetry/opentelemetry-go-contrib/issues/3743
if r.URL != nil {
@ -73,3 +79,14 @@ func getSpanNameFromRequestInfo(info *request.RequestInfo, r *http.Request) stri
}
return r.Method + " " + spanName
}
func notSystemPrivilegedGroup(req *http.Request) bool {
if u, ok := request.UserFrom(req.Context()); ok {
for _, group := range u.GetGroups() {
if group == user.SystemPrivilegedGroup || group == user.MonitoringGroup {
return false
}
}
}
return true
}

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package handlers contains HTTP handlers to implement the apiserver APIs.
package handlers // import "k8s.io/apiserver/pkg/endpoints/handlers"
package handlers

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package negotiation contains media type negotiation logic.
package negotiation // import "k8s.io/apiserver/pkg/endpoints/handlers/negotiation"
package negotiation

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package responsewriters containers helpers to write responses in HTTP handlers.
package responsewriters // import "k8s.io/apiserver/pkg/endpoints/handlers/responsewriters"
package responsewriters

View File

@ -157,6 +157,9 @@ const (
// (usually the entire object), and if the size is smaller no gzipping will be performed
// if the client requests it.
defaultGzipThresholdBytes = 128 * 1024
// Use the length of the first write to recognize streaming implementations.
// When streaming JSON first write is "{", while Kubernetes protobuf starts unique 4 byte header.
firstWriteStreamingThresholdBytes = 4
)
// negotiateContentEncoding returns a supported client-requested content encoding for the
@ -192,34 +195,62 @@ type deferredResponseWriter struct {
statusCode int
contentEncoding string
hasWritten bool
hw http.ResponseWriter
w io.Writer
hasBuffered bool
buffer []byte
hasWritten bool
hw http.ResponseWriter
w io.Writer
// totalBytes is the number of bytes written to `w` and does not include buffered bytes
totalBytes int
// lastWriteErr holds the error result (if any) of the last write attempt to `w`
lastWriteErr error
ctx context.Context
}
func (w *deferredResponseWriter) Write(p []byte) (n int, err error) {
ctx := w.ctx
span := tracing.SpanFromContext(ctx)
// This Step usually wraps in-memory object serialization.
span.AddEvent("About to start writing response", attribute.Int("size", len(p)))
switch {
case w.hasWritten:
// already written, cannot buffer
return w.unbufferedWrite(p)
firstWrite := !w.hasWritten
defer func() {
if err != nil {
span.AddEvent("Write call failed",
attribute.String("writer", fmt.Sprintf("%T", w.w)),
attribute.Int("size", len(p)),
attribute.Bool("firstWrite", firstWrite),
attribute.String("err", err.Error()))
} else {
span.AddEvent("Write call succeeded",
attribute.String("writer", fmt.Sprintf("%T", w.w)),
attribute.Int("size", len(p)),
attribute.Bool("firstWrite", firstWrite))
case w.contentEncoding != "gzip":
// non-gzip, no need to buffer
return w.unbufferedWrite(p)
case !w.hasBuffered && len(p) > defaultGzipThresholdBytes:
// not yet buffered, first write is long enough to trigger gzip, no need to buffer
return w.unbufferedWrite(p)
case !w.hasBuffered && len(p) > firstWriteStreamingThresholdBytes:
// not yet buffered, first write is longer than expected for streaming scenarios that would require buffering, no need to buffer
return w.unbufferedWrite(p)
default:
if !w.hasBuffered {
w.hasBuffered = true
// Start at 80 bytes to avoid rapid reallocation of the buffer.
// The minimum size of a 0-item serialized list object is 80 bytes:
// {"kind":"List","apiVersion":"v1","metadata":{"resourceVersion":"1"},"items":[]}\n
w.buffer = make([]byte, 0, max(80, len(p)))
}
w.buffer = append(w.buffer, p...)
var err error
if len(w.buffer) > defaultGzipThresholdBytes {
// we've accumulated enough to trigger gzip, write and clear buffer
_, err = w.unbufferedWrite(w.buffer)
w.buffer = nil
}
return len(p), err
}
}
func (w *deferredResponseWriter) unbufferedWrite(p []byte) (n int, err error) {
defer func() {
w.totalBytes += n
w.lastWriteErr = err
}()
if w.hasWritten {
return w.w.Write(p)
}
@ -240,16 +271,45 @@ func (w *deferredResponseWriter) Write(p []byte) (n int, err error) {
w.w = hw
}
span := tracing.SpanFromContext(w.ctx)
span.AddEvent("About to start writing response",
attribute.String("writer", fmt.Sprintf("%T", w.w)),
attribute.Int("size", len(p)),
)
header.Set("Content-Type", w.mediaType)
hw.WriteHeader(w.statusCode)
return w.w.Write(p)
}
func (w *deferredResponseWriter) Close() error {
func (w *deferredResponseWriter) Close() (err error) {
defer func() {
if !w.hasWritten {
return
}
span := tracing.SpanFromContext(w.ctx)
if w.lastWriteErr != nil {
span.AddEvent("Write call failed",
attribute.Int("size", w.totalBytes),
attribute.String("err", w.lastWriteErr.Error()))
} else {
span.AddEvent("Write call succeeded",
attribute.Int("size", w.totalBytes))
}
}()
if !w.hasWritten {
return nil
if !w.hasBuffered {
return nil
}
// never reached defaultGzipThresholdBytes, no need to do the gzip writer cleanup
_, err := w.unbufferedWrite(w.buffer)
w.buffer = nil
return err
}
var err error
switch t := w.w.(type) {
case *gzip.Writer:
err = t.Close()

View File

@ -17,4 +17,4 @@ limitations under the License.
// Package request contains everything around extracting info from
// a http request object.
// TODO: this package is temporary. Handlers must move into pkg/apiserver/handlers to avoid dependency cycle
package request // import "k8s.io/apiserver/pkg/endpoints/request"
package request

View File

@ -116,6 +116,12 @@ type LatencyTrackers struct {
// Validate webhooks are done in parallel, so max function is used.
ValidatingWebhookTracker DurationTracker
// AuthenticationTracker tracks the latency incurred by Authentication of request
AuthenticationTracker DurationTracker
// AuthorizationTracker tracks the latency incurred by Authorization of request
AuthorizationTracker DurationTracker
// APFQueueWaitTracker tracks the latency incurred by queue wait times
// from priority & fairness.
APFQueueWaitTracker DurationTracker
@ -179,6 +185,8 @@ func WithLatencyTrackersAndCustomClock(parent context.Context, c clock.Clock) co
return WithValue(parent, latencyTrackersKey, &LatencyTrackers{
MutatingWebhookTracker: newSumLatencyTracker(c),
ValidatingWebhookTracker: newMaxLatencyTracker(c),
AuthenticationTracker: newSumLatencyTracker(c),
AuthorizationTracker: newMaxLatencyTracker(c),
APFQueueWaitTracker: newMaxLatencyTracker(c),
StorageTracker: newSumLatencyTracker(c),
TransformTracker: newSumLatencyTracker(c),
@ -243,6 +251,22 @@ func TrackResponseWriteLatency(ctx context.Context, d time.Duration) {
}
}
// TrackAuthenticationLatency is used to track latency incurred
// by Authentication phase of request.
func TrackAuthenticationLatency(ctx context.Context, d time.Duration) {
if tracker, ok := LatencyTrackersFrom(ctx); ok {
tracker.AuthenticationTracker.TrackDuration(d)
}
}
// TrackAuthorizationLatency is used to track latency incurred
// by Authorization phase of request.
func TrackAuthorizationLatency(ctx context.Context, d time.Duration) {
if tracker, ok := LatencyTrackersFrom(ctx); ok {
tracker.AuthorizationTracker.TrackDuration(d)
}
}
// TrackAPFQueueWaitLatency is used to track latency incurred
// by priority and fairness queues.
func TrackAPFQueueWaitLatency(ctx context.Context, d time.Duration) {
@ -275,6 +299,8 @@ func AuditAnnotationsFromLatencyTrackers(ctx context.Context) map[string]string
validatingWebhookLatencyKey = "apiserver.latency.k8s.io/validating-webhook"
decodeLatencyKey = "apiserver.latency.k8s.io/decode-response-object"
apfQueueWaitLatencyKey = "apiserver.latency.k8s.io/apf-queue-wait"
authenticationLatencyKey = "apiserver.latency.k8s.io/authentication"
authorizationLatencyKey = "apiserver.latency.k8s.io/authorization"
)
tracker, ok := LatencyTrackersFrom(ctx)
@ -307,5 +333,11 @@ func AuditAnnotationsFromLatencyTrackers(ctx context.Context) map[string]string
if latency := tracker.APFQueueWaitTracker.GetLatency(); latency != 0 {
annotations[apfQueueWaitLatencyKey] = latency.String()
}
if latency := tracker.AuthenticationTracker.GetLatency(); latency != 0 {
annotations[authenticationLatencyKey] = latency.String()
}
if latency := tracker.AuthorizationTracker.GetLatency(); latency != 0 {
annotations[authorizationLatencyKey] = latency.String()
}
return annotations
}

View File

@ -34,19 +34,17 @@ const (
// of code conflicts because changes are more likely to be scattered
// across the file.
// owner: @ivelichkovich, @tallclair
// stable: v1.30
// kep: https://kep.k8s.io/3716
// owner: @jefftree
//
// Enables usage of MatchConditions fields to use CEL expressions for matching on admission webhooks
AdmissionWebhookMatchConditions featuregate.Feature = "AdmissionWebhookMatchConditions"
// Remove the v2beta1 apidiscovery.k8s.io/v2beta1 group version. Aggregated
// discovery implements its own handlers and follows a different lifecycle than
// traditional k8s resources.
AggregatedDiscoveryRemoveBetaType featuregate.Feature = "AggregatedDiscoveryRemoveBetaType"
// owner: @jefftree @alexzielenski
// stable: v1.30
// owner: @modulitos
//
// Enables an single HTTP endpoint /discovery/<version> which supports native HTTP
// caching with ETags containing all APIResources known to the apiserver.
AggregatedDiscoveryEndpoint featuregate.Feature = "AggregatedDiscoveryEndpoint"
// Allow user.DefaultInfo.UID to be set from x509 cert during cert auth.
AllowParsingUserUIDFromCertAuth featuregate.Feature = "AllowParsingUserUIDFromCertAuth"
// owner: @vinayakankugoyal
// kep: https://kep.k8s.io/4633
@ -63,13 +61,6 @@ const (
// resources using the Kubernetes API only.
AllowUnsafeMalformedObjectDeletion featuregate.Feature = "AllowUnsafeMalformedObjectDeletion"
// owner: @smarterclayton
// stable: 1.29
//
// Allow API clients to retrieve resource lists in chunks rather than
// all at once.
APIListChunking featuregate.Feature = "APIListChunking"
// owner: @ilackams
//
// Enables compression of REST responses (GET and LIST only)
@ -118,10 +109,6 @@ const (
// Enables coordinated leader election in the API server
CoordinatedLeaderElection featuregate.Feature = "CoordinatedLeaderElection"
//
// Allows for updating watchcache resource version with progress notify events.
EfficientWatchResumption featuregate.Feature = "EfficientWatchResumption"
// owner: @aramase
// kep: https://kep.k8s.io/3299
// deprecated: v1.28
@ -129,6 +116,12 @@ const (
// Enables KMS v1 API for encryption at rest.
KMSv1 featuregate.Feature = "KMSv1"
// owner: @serathius
// kep: https://kep.k8s.io/4988
//
// Enables generating snapshots of watch cache store and using them to serve LIST requests.
ListFromCacheSnapshot featuregate.Feature = "ListFromCacheSnapshot"
// owner: @alexzielenski, @cici37, @jiahuif, @jpbetz
// kep: https://kep.k8s.io/3962
//
@ -142,13 +135,6 @@ const (
// in the spec returned from kube-apiserver.
OpenAPIEnums featuregate.Feature = "OpenAPIEnums"
// owner: @caesarxuchao
// stable: 1.29
//
// Allow apiservers to show a count of remaining items in the response
// to a chunking list request.
RemainingItemCount featuregate.Feature = "RemainingItemCount"
// owner: @stlaz
//
// Enable kube-apiserver to accept UIDs via request header authentication.
@ -217,6 +203,14 @@ const (
// document.
StorageVersionHash featuregate.Feature = "StorageVersionHash"
// owner: @serathius
// Allow API server JSON encoder to encode collections item by item, instead of all at once.
StreamingCollectionEncodingToJSON featuregate.Feature = "StreamingCollectionEncodingToJSON"
// owner: @serathius
// Allow API server Protobuf encoder to encode collections item by item, instead of all at once.
StreamingCollectionEncodingToProtobuf featuregate.Feature = "StreamingCollectionEncodingToProtobuf"
// owner: @aramase, @enj, @nabokihms
// kep: https://kep.k8s.io/3331
//
@ -229,11 +223,6 @@ const (
// Enables Structured Authorization Configuration
StructuredAuthorizationConfiguration featuregate.Feature = "StructuredAuthorizationConfiguration"
// owner: @wojtek-t
//
// Enables support for watch bookmark events.
WatchBookmark featuregate.Feature = "WatchBookmark"
// owner: @wojtek-t
//
// Enables post-start-hook for storage readiness
@ -257,7 +246,6 @@ const (
)
func init() {
runtime.Must(utilfeature.DefaultMutableFeatureGate.Add(defaultKubernetesFeatureGates))
runtime.Must(utilfeature.DefaultMutableFeatureGate.AddVersioned(defaultVersionedKubernetesFeatureGates))
}
@ -268,16 +256,13 @@ func init() {
// Entries are alphabetized and separated from each other with blank lines to avoid sweeping gofmt changes
// when adding or removing one entry.
var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{
AdmissionWebhookMatchConditions: {
{Version: version.MustParse("1.27"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.28"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.30"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
AggregatedDiscoveryRemoveBetaType: {
{Version: version.MustParse("1.0"), Default: false, PreRelease: featuregate.GA},
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Deprecated},
},
AggregatedDiscoveryEndpoint: {
{Version: version.MustParse("1.26"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.27"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.30"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
AllowParsingUserUIDFromCertAuth: {
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Beta},
},
AllowUnsafeMalformedObjectDeletion: {
@ -289,12 +274,6 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
{Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta},
},
APIListChunking: {
{Version: version.MustParse("1.8"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.9"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.29"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
},
APIResponseCompression: {
{Version: version.MustParse("1.8"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.16"), Default: true, PreRelease: featuregate.Beta},
@ -316,6 +295,7 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
BtreeWatchCache: {
{Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
},
AuthorizeWithSelectors: {
@ -338,19 +318,19 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
CoordinatedLeaderElection: {
{Version: version.MustParse("1.31"), Default: false, PreRelease: featuregate.Alpha},
},
EfficientWatchResumption: {
{Version: version.MustParse("1.20"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.21"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.24"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
{Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Beta},
},
KMSv1: {
{Version: version.MustParse("1.0"), Default: true, PreRelease: featuregate.GA},
{Version: version.MustParse("1.28"), Default: true, PreRelease: featuregate.Deprecated},
{Version: version.MustParse("1.29"), Default: false, PreRelease: featuregate.Deprecated},
},
ListFromCacheSnapshot: {
{Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Alpha},
},
MutatingAdmissionPolicy: {
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha},
},
@ -360,14 +340,9 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
{Version: version.MustParse("1.24"), Default: true, PreRelease: featuregate.Beta},
},
RemainingItemCount: {
{Version: version.MustParse("1.15"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.16"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.29"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
},
RemoteRequestHeaderUID: {
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Beta},
},
ResilientWatchCacheInitialization: {
@ -382,6 +357,7 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
SeparateCacheWatchRPC: {
{Version: version.MustParse("1.28"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Deprecated},
},
StorageVersionAPI: {
@ -393,6 +369,14 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
{Version: version.MustParse("1.15"), Default: true, PreRelease: featuregate.Beta},
},
StreamingCollectionEncodingToJSON: {
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Beta},
},
StreamingCollectionEncodingToProtobuf: {
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Beta},
},
StrictCostEnforcementForVAP: {
{Version: version.MustParse("1.30"), Default: false, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
@ -419,26 +403,19 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
{Version: version.MustParse("1.29"), Default: true, PreRelease: featuregate.Beta},
},
WatchBookmark: {
{Version: version.MustParse("1.15"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.16"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.17"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
},
WatchCacheInitializationPostStartHook: {
{Version: version.MustParse("1.31"), Default: false, PreRelease: featuregate.Beta},
},
WatchFromStorageWithoutResourceVersion: {
{Version: version.MustParse("1.27"), Default: false, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Deprecated, LockToDefault: true},
},
WatchList: {
{Version: version.MustParse("1.27"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta},
// switch this back to false because the json and proto streaming encoders appear to work better.
{Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Beta},
},
}
// defaultKubernetesFeatureGates consists of legacy unversioned Kubernetes-specific feature keys.
// Please do not add to this struct and use defaultVersionedKubernetesFeatureGates instead.
var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{}

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package generic provides a generic object store interface and a
// generic label/field matching type.
package generic // import "k8s.io/apiserver/pkg/registry/generic"
package generic

View File

@ -107,8 +107,14 @@ func (d *corruptObjectDeleter) Delete(ctx context.Context, name string, deleteVa
klog.FromContext(ctx).V(1).Info("Going to perform unsafe object deletion", "object", klog.KRef(genericapirequest.NamespaceValue(ctx), name))
out := d.store.NewFunc()
storageOpts := storage.DeleteOptions{IgnoreStoreReadError: true}
// dropping preconditions, and keeping the admission
if err := storageBackend.Delete(ctx, key, out, nil, storage.ValidateObjectFunc(deleteValidation), nil, storageOpts); err != nil {
// we don't have the old object in the cache, neither can it be
// retrieved from the storage and decoded into an object
// successfully, so we do the following:
// a) skip preconditions check
// b) skip admission validation, rest.ValidateAllObjectFunc will "admit everything"
var nilPreconditions *storage.Preconditions = nil
var nilCachedExistingObject runtime.Object = nil
if err := storageBackend.Delete(ctx, key, out, nilPreconditions, rest.ValidateAllObjectFunc, nilCachedExistingObject, storageOpts); err != nil {
if storage.IsNotFound(err) {
// the DELETE succeeded, but we don't have the object since it's
// not retrievable from the storage, so we send a nil object

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package etcd has a generic implementation of a registry that
// stores things in etcd.
package registry // import "k8s.io/apiserver/pkg/registry/generic/registry"
package registry

View File

@ -54,31 +54,34 @@ func StorageWithCacher() generic.StorageDecorator {
}
cacherConfig := cacherstorage.Config{
Storage: s,
Versioner: storage.APIObjectVersioner{},
GroupResource: storageConfig.GroupResource,
ResourcePrefix: resourcePrefix,
KeyFunc: keyFunc,
NewFunc: newFunc,
NewListFunc: newListFunc,
GetAttrsFunc: getAttrsFunc,
IndexerFuncs: triggerFuncs,
Indexers: indexers,
Codec: storageConfig.Codec,
Storage: s,
Versioner: storage.APIObjectVersioner{},
GroupResource: storageConfig.GroupResource,
EventsHistoryWindow: storageConfig.EventsHistoryWindow,
ResourcePrefix: resourcePrefix,
KeyFunc: keyFunc,
NewFunc: newFunc,
NewListFunc: newListFunc,
GetAttrsFunc: getAttrsFunc,
IndexerFuncs: triggerFuncs,
Indexers: indexers,
Codec: storageConfig.Codec,
}
cacher, err := cacherstorage.NewCacherFromConfig(cacherConfig)
if err != nil {
return nil, func() {}, err
}
delegator := cacherstorage.NewCacheDelegator(cacher, s)
var once sync.Once
destroyFunc := func() {
once.Do(func() {
delegator.Stop()
cacher.Stop()
d()
})
}
return cacher, destroyFunc, nil
return delegator, destroyFunc, nil
}
}

View File

@ -1240,7 +1240,6 @@ func (e *Store) DeleteCollection(ctx context.Context, deleteValidation rest.Vali
var items []runtime.Object
// TODO(wojtek-t): Decide if we don't want to start workers more opportunistically.
workersNumber := e.DeleteCollectionWorkers
if workersNumber < 1 {
workersNumber = 1

View File

@ -126,9 +126,22 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx context.Context, obj runtime.
if period >= *objectMeta.GetDeletionGracePeriodSeconds() {
return false, true, nil
}
// Move the existing deletionTimestamp back by existing object.DeletionGracePeriod, then forward by options.DeletionGracePeriod.
// This moves the deletionTimestamp back, since the grace period can only be shortened in this code path.
newDeletionTimestamp := metav1.NewTime(
objectMeta.GetDeletionTimestamp().Add(-time.Second * time.Duration(*objectMeta.GetDeletionGracePeriodSeconds())).
Add(time.Second * time.Duration(*options.GracePeriodSeconds)))
// Prevent shortening the grace period moving deletionTimestamp into the past
if now := metav1Now(); newDeletionTimestamp.Before(&now) {
newDeletionTimestamp = now
if period != 0 {
// Since a graceful deletion was requested (options.GracePeriodSeconds != 0), but the entire grace period has already expired,
// shorten to the minimum period possible while still treating this as a graceful delete.
// This means the API server updates the object, another actor observes the update
// and is still responsible for the final delete with options.GracePeriodSeconds == 0.
period = int64(1)
}
}
objectMeta.SetDeletionTimestamp(&newDeletionTimestamp)
objectMeta.SetDeletionGracePeriodSeconds(&period)
return true, false, nil
@ -147,8 +160,8 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx context.Context, obj runtime.
return false, false, errors.NewInternalError(fmt.Errorf("options.GracePeriodSeconds should not be nil"))
}
now := metav1.NewTime(metav1.Now().Add(time.Second * time.Duration(*options.GracePeriodSeconds)))
objectMeta.SetDeletionTimestamp(&now)
requestedDeletionTimestamp := metav1.NewTime(metav1Now().Add(time.Second * time.Duration(*options.GracePeriodSeconds)))
objectMeta.SetDeletionTimestamp(&requestedDeletionTimestamp)
objectMeta.SetDeletionGracePeriodSeconds(options.GracePeriodSeconds)
// If it's the first graceful deletion we are going to set the DeletionTimestamp to non-nil.
// Controllers of the object that's being deleted shouldn't take any nontrivial actions, hence its behavior changes.

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package rest defines common logic around changes to Kubernetes-style resources.
package rest // import "k8s.io/apiserver/pkg/registry/rest"
package rest

View File

@ -23,6 +23,9 @@ import (
"k8s.io/apimachinery/pkg/util/uuid"
)
// metav1Now returns metav1.Now(), but allows override for unit testing
var metav1Now = func() metav1.Time { return metav1.Now() }
// WipeObjectMetaSystemFields erases fields that are managed by the system on ObjectMeta.
func WipeObjectMetaSystemFields(meta metav1.Object) {
meta.SetCreationTimestamp(metav1.Time{})
@ -34,7 +37,7 @@ func WipeObjectMetaSystemFields(meta metav1.Object) {
// FillObjectMetaSystemFields populates fields that are managed by the system on ObjectMeta.
func FillObjectMetaSystemFields(meta metav1.Object) {
meta.SetCreationTimestamp(metav1.Now())
meta.SetCreationTimestamp(metav1Now())
meta.SetUID(uuid.NewUUID())
}

View File

@ -0,0 +1,321 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package rest
import (
"context"
"fmt"
"strings"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
validationmetrics "k8s.io/apiserver/pkg/validation"
"k8s.io/klog/v2"
)
// ValidateDeclaratively validates obj against declarative validation tags
// defined in its Go type. It uses the API version extracted from ctx and the
// provided scheme for validation.
//
// The ctx MUST contain requestInfo, which determines the target API for
// validation. The obj is converted to the API version using the provided scheme
// before validation occurs. The scheme MUST have the declarative validation
// registered for the requested resource/subresource.
//
// option should contain any validation options that the declarative validation
// tags expect.
//
// Returns a field.ErrorList containing any validation errors. An internal error
// is included if requestInfo is missing from the context or if version
// conversion fails.
func ValidateDeclaratively(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj runtime.Object) field.ErrorList {
if requestInfo, found := genericapirequest.RequestInfoFrom(ctx); found {
groupVersion := schema.GroupVersion{Group: requestInfo.APIGroup, Version: requestInfo.APIVersion}
versionedObj, err := scheme.ConvertToVersion(obj, groupVersion)
if err != nil {
return field.ErrorList{field.InternalError(nil, fmt.Errorf("unexpected error converting to versioned type: %w", err))}
}
subresources, err := parseSubresourcePath(requestInfo.Subresource)
if err != nil {
return field.ErrorList{field.InternalError(nil, fmt.Errorf("unexpected error parsing subresource path: %w", err))}
}
return scheme.Validate(ctx, options, versionedObj, subresources...)
} else {
return field.ErrorList{field.InternalError(nil, fmt.Errorf("could not find requestInfo in context"))}
}
}
// ValidateUpdateDeclaratively validates obj and oldObj against declarative
// validation tags defined in its Go type. It uses the API version extracted from
// ctx and the provided scheme for validation.
//
// The ctx MUST contain requestInfo, which determines the target API for
// validation. The obj is converted to the API version using the provided scheme
// before validation occurs. The scheme MUST have the declarative validation
// registered for the requested resource/subresource.
//
// option should contain any validation options that the declarative validation
// tags expect.
//
// Returns a field.ErrorList containing any validation errors. An internal error
// is included if requestInfo is missing from the context or if version
// conversion fails.
func ValidateUpdateDeclaratively(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj, oldObj runtime.Object) field.ErrorList {
if requestInfo, found := genericapirequest.RequestInfoFrom(ctx); found {
groupVersion := schema.GroupVersion{Group: requestInfo.APIGroup, Version: requestInfo.APIVersion}
versionedObj, err := scheme.ConvertToVersion(obj, groupVersion)
if err != nil {
return field.ErrorList{field.InternalError(nil, fmt.Errorf("unexpected error converting to versioned type: %w", err))}
}
versionedOldObj, err := scheme.ConvertToVersion(oldObj, groupVersion)
if err != nil {
return field.ErrorList{field.InternalError(nil, fmt.Errorf("unexpected error converting to versioned type: %w", err))}
}
subresources, err := parseSubresourcePath(requestInfo.Subresource)
if err != nil {
return field.ErrorList{field.InternalError(nil, fmt.Errorf("unexpected error parsing subresource path: %w", err))}
}
return scheme.ValidateUpdate(ctx, options, versionedObj, versionedOldObj, subresources...)
} else {
return field.ErrorList{field.InternalError(nil, fmt.Errorf("could not find requestInfo in context"))}
}
}
func parseSubresourcePath(subresourcePath string) ([]string, error) {
if len(subresourcePath) == 0 {
return nil, nil
}
parts := strings.Split(subresourcePath, "/")
for _, part := range parts {
if len(part) == 0 {
return nil, fmt.Errorf("invalid subresource path: %s", subresourcePath)
}
}
return parts, nil
}
// CompareDeclarativeErrorsAndEmitMismatches checks for mismatches between imperative and declarative validation
// and logs + emits metrics when inconsistencies are found
func CompareDeclarativeErrorsAndEmitMismatches(ctx context.Context, imperativeErrs, declarativeErrs field.ErrorList, takeover bool) {
logger := klog.FromContext(ctx)
mismatchDetails := gatherDeclarativeValidationMismatches(imperativeErrs, declarativeErrs, takeover)
for _, detail := range mismatchDetails {
// Log information about the mismatch using contextual logger
logger.Error(nil, detail)
// Increment the metric for the mismatch
validationmetrics.Metrics.IncDeclarativeValidationMismatchMetric()
}
}
// gatherDeclarativeValidationMismatches compares imperative and declarative validation errors
// and returns detailed information about any mismatches found. Errors are compared via type, field, and origin
func gatherDeclarativeValidationMismatches(imperativeErrs, declarativeErrs field.ErrorList, takeover bool) []string {
var mismatchDetails []string
// short circuit here to minimize allocs for usual case of 0 validation errors
if len(imperativeErrs) == 0 && len(declarativeErrs) == 0 {
return mismatchDetails
}
// recommendation based on takeover status
recommendation := "This difference should not affect system operation since hand written validation is authoritative."
if takeover {
recommendation = "Consider disabling the DeclarativeValidationTakeover feature gate to keep data persisted in etcd consistent with prior versions of Kubernetes."
}
fuzzyMatcher := field.ErrorMatcher{}.ByType().ByField().ByOrigin().RequireOriginWhenInvalid()
exactMatcher := field.ErrorMatcher{}.Exactly()
// Dedupe imperative errors of exact error matches as they are
// not intended and come from (buggy) duplicate validation calls
// This is necessary as without deduping we could get unmatched
// imperative errors for cases that are correct (matching)
dedupedImperativeErrs := field.ErrorList{}
for _, err := range imperativeErrs {
found := false
for _, existingErr := range dedupedImperativeErrs {
if exactMatcher.Matches(existingErr, err) {
found = true
break
}
}
if !found {
dedupedImperativeErrs = append(dedupedImperativeErrs, err)
}
}
imperativeErrs = dedupedImperativeErrs
// Create a copy of declarative errors to track remaining ones
remaining := make(field.ErrorList, len(declarativeErrs))
copy(remaining, declarativeErrs)
// Match each "covered" imperative error to declarative errors.
// We use a fuzzy matching approach to find corresponding declarative errors
// for each imperative error marked as CoveredByDeclarative.
// As matches are found, they're removed from the 'remaining' list.
// They are removed from `remaining` with a "1:many" mapping: for a given
// imperative error we mark as matched all matching declarative errors
// This allows us to:
// 1. Detect imperative errors that should have matching declarative errors but don't
// 2. Identify extra declarative errors with no imperative counterpart
// Both cases indicate issues with the declarative validation implementation.
for _, iErr := range imperativeErrs {
if !iErr.CoveredByDeclarative {
continue
}
tmp := make(field.ErrorList, 0, len(remaining))
matchCount := 0
for _, dErr := range remaining {
if fuzzyMatcher.Matches(iErr, dErr) {
matchCount++
} else {
tmp = append(tmp, dErr)
}
}
if matchCount == 0 {
mismatchDetails = append(mismatchDetails,
fmt.Sprintf(
"Unexpected difference between hand written validation and declarative validation error results, unmatched error(s) found %s. "+
"This indicates an issue with declarative validation. %s",
fuzzyMatcher.Render(iErr),
recommendation,
),
)
}
remaining = tmp
}
// Any remaining unmatched declarative errors are considered "extra"
for _, dErr := range remaining {
mismatchDetails = append(mismatchDetails,
fmt.Sprintf(
"Unexpected difference between hand written validation and declarative validation error results, extra error(s) found %s. "+
"This indicates an issue with declarative validation. %s",
fuzzyMatcher.Render(dErr),
recommendation,
),
)
}
return mismatchDetails
}
// createDeclarativeValidationPanicHandler returns a function with panic recovery logic
// that will increment the panic metric and either log or append errors based on the takeover parameter.
func createDeclarativeValidationPanicHandler(ctx context.Context, errs *field.ErrorList, takeover bool) func() {
logger := klog.FromContext(ctx)
return func() {
if r := recover(); r != nil {
// Increment the panic metric counter
validationmetrics.Metrics.IncDeclarativeValidationPanicMetric()
const errorFmt = "panic during declarative validation: %v"
if takeover {
// If takeover is enabled, output as a validation error as authoritative validator panicked and validation should error
*errs = append(*errs, field.InternalError(nil, fmt.Errorf(errorFmt, r)))
} else {
// if takeover not enabled, log the panic as an error message
logger.Error(nil, fmt.Sprintf(errorFmt, r))
}
}
}
}
// panicSafeValidateFunc wraps a validation function with panic recovery logic.
// It takes a validation function with the ValidateDeclaratively signature
// and returns a function with the same signature.
// The returned function will execute the wrapped function and handle any panics by
// incrementing the panic metric, and logging an error message
// if takeover=false, and adding a validation error if takeover=true.
func panicSafeValidateFunc(
validateFunc func(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj runtime.Object) field.ErrorList,
takeover bool,
) func(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj runtime.Object) field.ErrorList {
return func(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj runtime.Object) (errs field.ErrorList) {
defer createDeclarativeValidationPanicHandler(ctx, &errs, takeover)()
return validateFunc(ctx, options, scheme, obj)
}
}
// panicSafeValidateUpdateFunc wraps an update validation function with panic recovery logic.
// It takes a validation function with the ValidateUpdateDeclaratively signature
// and returns a function with the same signature.
// The returned function will execute the wrapped function and handle any panics by
// incrementing the panic metric, and logging an error message
// if takeover=false, and adding a validation error if takeover=true.
func panicSafeValidateUpdateFunc(
validateUpdateFunc func(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj, oldObj runtime.Object) field.ErrorList,
takeover bool,
) func(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj, oldObj runtime.Object) field.ErrorList {
return func(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj, oldObj runtime.Object) (errs field.ErrorList) {
defer createDeclarativeValidationPanicHandler(ctx, &errs, takeover)()
return validateUpdateFunc(ctx, options, scheme, obj, oldObj)
}
}
// ValidateDeclarativelyWithRecovery validates obj against declarative validation tags
// with panic recovery logic. It uses the API version extracted from ctx and the
// provided scheme for validation.
//
// The ctx MUST contain requestInfo, which determines the target API for
// validation. The obj is converted to the API version using the provided scheme
// before validation occurs. The scheme MUST have the declarative validation
// registered for the requested resource/subresource.
//
// option should contain any validation options that the declarative validation
// tags expect.
//
// takeover determines if panic recovery should return validation errors (true) or
// just log warnings (false).
//
// Returns a field.ErrorList containing any validation errors. An internal error
// is included if requestInfo is missing from the context, if version
// conversion fails, or if a panic occurs during validation when
// takeover is true.
func ValidateDeclarativelyWithRecovery(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj runtime.Object, takeover bool) field.ErrorList {
return panicSafeValidateFunc(ValidateDeclaratively, takeover)(ctx, options, scheme, obj)
}
// ValidateUpdateDeclarativelyWithRecovery validates obj and oldObj against declarative
// validation tags with panic recovery logic. It uses the API version extracted from
// ctx and the provided scheme for validation.
//
// The ctx MUST contain requestInfo, which determines the target API for
// validation. The obj is converted to the API version using the provided scheme
// before validation occurs. The scheme MUST have the declarative validation
// registered for the requested resource/subresource.
//
// option should contain any validation options that the declarative validation
// tags expect.
//
// takeover determines if panic recovery should return validation errors (true) or
// just log warnings (false).
//
// Returns a field.ErrorList containing any validation errors. An internal error
// is included if requestInfo is missing from the context, if version
// conversion fails, or if a panic occurs during validation when
// takeover is true.
func ValidateUpdateDeclarativelyWithRecovery(ctx context.Context, options sets.Set[string], scheme *runtime.Scheme, obj, oldObj runtime.Object, takeover bool) field.ErrorList {
return panicSafeValidateUpdateFunc(ValidateUpdateDeclaratively, takeover)(ctx, options, scheme, obj, oldObj)
}

View File

@ -73,12 +73,12 @@ import (
flowcontrolrequest "k8s.io/apiserver/pkg/util/flowcontrol/request"
"k8s.io/client-go/informers"
restclient "k8s.io/client-go/rest"
basecompatibility "k8s.io/component-base/compatibility"
"k8s.io/component-base/featuregate"
"k8s.io/component-base/logs"
"k8s.io/component-base/metrics/features"
"k8s.io/component-base/metrics/prometheus/slis"
"k8s.io/component-base/tracing"
utilversion "k8s.io/component-base/version"
"k8s.io/component-base/zpages/flagz"
"k8s.io/klog/v2"
openapicommon "k8s.io/kube-openapi/pkg/common"
@ -153,7 +153,17 @@ type Config struct {
// EffectiveVersion determines which apis and features are available
// based on when the api/feature lifecyle.
EffectiveVersion utilversion.EffectiveVersion
EffectiveVersion basecompatibility.EffectiveVersion
// EmulationForwardCompatible is an option to implicitly enable all APIs which are introduced after the emulation version and
// have higher priority than APIs of the same group resource enabled at the emulation version.
// If true, all APIs that have higher priority than the APIs(beta+) of the same group resource enabled at the emulation version will be installed.
// This is needed when a controller implementation migrates to newer API versions, for the binary version, and also uses the newer API versions even when emulation version is set.
// Not applicable to alpha APIs.
EmulationForwardCompatible bool
// RuntimeConfigEmulationForwardCompatible is an option to explicitly enable specific APIs introduced after the emulation version through the runtime-config.
// If true, APIs identified by group/version that are enabled in the --runtime-config flag will be installed even if it is introduced after the emulation version. --runtime-config flag values that identify multiple APIs, such as api/all,api/ga,api/beta, are not influenced by this flag and will only enable APIs available at the current emulation version.
// If false, error would be thrown if any GroupVersion or GroupVersionResource explicitly enabled in the --runtime-config flag is introduced after the emulation version.
RuntimeConfigEmulationForwardCompatible bool
// FeatureGate is a way to plumb feature gate through if you have them.
FeatureGate featuregate.FeatureGate
// AuditBackend is where audit events are sent to.
@ -839,20 +849,20 @@ func (c completedConfig) New(name string, delegationTarget DelegationTarget) (*G
StorageReadinessHook: NewStorageReadinessHook(c.StorageInitializationTimeout),
StorageVersionManager: c.StorageVersionManager,
EffectiveVersion: c.EffectiveVersion,
FeatureGate: c.FeatureGate,
EffectiveVersion: c.EffectiveVersion,
EmulationForwardCompatible: c.EmulationForwardCompatible,
RuntimeConfigEmulationForwardCompatible: c.RuntimeConfigEmulationForwardCompatible,
FeatureGate: c.FeatureGate,
muxAndDiscoveryCompleteSignals: map[string]<-chan struct{}{},
}
if c.FeatureGate.Enabled(genericfeatures.AggregatedDiscoveryEndpoint) {
manager := c.AggregatedDiscoveryGroupManager
if manager == nil {
manager = discoveryendpoint.NewResourceManager("apis")
}
s.AggregatedDiscoveryGroupManager = manager
s.AggregatedLegacyDiscoveryGroupManager = discoveryendpoint.NewResourceManager("api")
manager := c.AggregatedDiscoveryGroupManager
if manager == nil {
manager = discoveryendpoint.NewResourceManager("apis")
}
s.AggregatedDiscoveryGroupManager = manager
s.AggregatedLegacyDiscoveryGroupManager = discoveryendpoint.NewResourceManager("api")
for {
if c.JSONPatchMaxCopyBytes <= 0 {
break
@ -1030,6 +1040,11 @@ func DefaultBuildHandlerChain(apiHandler http.Handler, c *Config) http.Handler {
failedHandler := genericapifilters.Unauthorized(c.Serializer)
failedHandler = genericapifilters.WithFailedAuthenticationAudit(failedHandler, c.AuditBackend, c.AuditPolicyRuleEvaluator)
// WithTracing comes after authentication so we can allow authenticated
// clients to influence sampling.
if c.FeatureGate.Enabled(genericfeatures.APIServerTracing) {
handler = genericapifilters.WithTracing(handler, c.TracerProvider)
}
failedHandler = filterlatency.TrackCompleted(failedHandler)
handler = filterlatency.TrackCompleted(handler)
handler = genericapifilters.WithAuthentication(handler, c.Authentication.Authenticator, failedHandler, c.Authentication.APIAudiences, c.Authentication.RequestHeaderConfig)
@ -1060,9 +1075,6 @@ func DefaultBuildHandlerChain(apiHandler http.Handler, c *Config) http.Handler {
handler = genericfilters.WithRetryAfter(handler, c.lifecycleSignals.NotAcceptingNewRequest.Signaled())
}
handler = genericfilters.WithHTTPLogging(handler)
if c.FeatureGate.Enabled(genericfeatures.APIServerTracing) {
handler = genericapifilters.WithTracing(handler, c.TracerProvider)
}
handler = genericapifilters.WithLatencyTrackers(handler)
// WithRoutine will execute future handlers in a separate goroutine and serving
// handler in current goroutine to minimize the stack memory usage. It must be
@ -1108,15 +1120,11 @@ func installAPI(name string, s *GenericAPIServer, c *Config) {
}
}
routes.Version{Version: c.EffectiveVersion.BinaryVersion().Info()}.Install(s.Handler.GoRestfulContainer)
routes.Version{Version: c.EffectiveVersion.Info()}.Install(s.Handler.GoRestfulContainer)
if c.EnableDiscovery {
if c.FeatureGate.Enabled(genericfeatures.AggregatedDiscoveryEndpoint) {
wrapped := discoveryendpoint.WrapAggregatedDiscoveryToHandler(s.DiscoveryGroupManager, s.AggregatedDiscoveryGroupManager)
s.Handler.GoRestfulContainer.Add(wrapped.GenerateWebService("/apis", metav1.APIGroupList{}))
} else {
s.Handler.GoRestfulContainer.Add(s.DiscoveryGroupManager.WebService())
}
wrapped := discoveryendpoint.WrapAggregatedDiscoveryToHandler(s.DiscoveryGroupManager, s.AggregatedDiscoveryGroupManager)
s.Handler.GoRestfulContainer.Add(wrapped.GenerateWebService("/apis", metav1.APIGroupList{}))
}
if c.FlowControl != nil {
c.FlowControl.Install(s.Handler.NonGoRestfulMux)

View File

@ -19,6 +19,8 @@ package server
import (
"fmt"
"os"
"regexp"
"sort"
"strconv"
"strings"
@ -27,16 +29,26 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
apimachineryversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/version"
"k8s.io/apiserver/pkg/registry/rest"
serverstorage "k8s.io/apiserver/pkg/server/storage"
"k8s.io/klog/v2"
)
var alphaPattern = regexp.MustCompile(`^v\d+alpha\d+$`)
// resourceExpirationEvaluator holds info for deciding if a particular rest.Storage needs to excluded from the API
type resourceExpirationEvaluator struct {
currentVersion *apimachineryversion.Version
isAlpha bool
currentVersion *apimachineryversion.Version
emulationForwardCompatible bool
runtimeConfigEmulationForwardCompatible bool
isAlpha bool
// Special flag checking for the existence of alpha.0
// alpha.0 is a special case where everything merged to master is auto propagated to the release-1.n branch
isAlphaZero bool
// This is usually set for testing for which tests need to be removed. This prevent insta-failing CI.
// Set KUBE_APISERVER_STRICT_REMOVED_API_HANDLING_IN_ALPHA to see what will be removed when we tag beta
// This flag only takes effect during alpha but not alphaZero.
strictRemovedHandlingInAlpha bool
// This is usually set by a cluster-admin looking for a short-term escape hatch after something bad happened.
// This should be made a flag before merge
@ -46,24 +58,51 @@ type resourceExpirationEvaluator struct {
// ResourceExpirationEvaluator indicates whether or not a resource should be served.
type ResourceExpirationEvaluator interface {
// RemoveDeletedKinds inspects the storage map and modifies it in place by removing storage for kinds that have been deleted.
// RemoveUnavailableKinds inspects the storage map and modifies it in place by removing storage for kinds that have been deleted or are introduced after the current version.
// versionedResourcesStorageMap mirrors the field on APIGroupInfo, it's a map from version to resource to the storage.
RemoveDeletedKinds(groupName string, versioner runtime.ObjectVersioner, versionedResourcesStorageMap map[string]map[string]rest.Storage)
RemoveUnavailableKinds(groupName string, versioner runtime.ObjectVersioner, versionedResourcesStorageMap map[string]map[string]rest.Storage, apiResourceConfigSource serverstorage.APIResourceConfigSource) error
// ShouldServeForVersion returns true if a particular version cut off is after the current version
ShouldServeForVersion(majorRemoved, minorRemoved int) bool
}
type ResourceExpirationEvaluatorOptions struct {
// CurrentVersion is the current version of the apiserver.
CurrentVersion *apimachineryversion.Version
// Prerelease holds an optional prerelease portion of the version.
// This is used to determine if the current binary is an alpha.
Prerelease string
// EmulationForwardCompatible indicates whether the apiserver should serve resources that are introduced after the current version,
// when resources of the same group and resource name but with lower priority are served.
// Not applicable to alpha APIs.
EmulationForwardCompatible bool
// RuntimeConfigEmulationForwardCompatible indicates whether the apiserver should serve resources that are introduced after the current version,
// when the resource is explicitly enabled in runtime-config.
RuntimeConfigEmulationForwardCompatible bool
}
func NewResourceExpirationEvaluator(currentVersion *apimachineryversion.Version) (ResourceExpirationEvaluator, error) {
opts := ResourceExpirationEvaluatorOptions{
CurrentVersion: apimachineryversion.MajorMinor(currentVersion.Major(), currentVersion.Minor()),
Prerelease: currentVersion.PreRelease(),
}
return NewResourceExpirationEvaluatorFromOptions(opts)
}
func NewResourceExpirationEvaluatorFromOptions(opts ResourceExpirationEvaluatorOptions) (ResourceExpirationEvaluator, error) {
currentVersion := opts.CurrentVersion
if currentVersion == nil {
return nil, fmt.Errorf("empty NewResourceExpirationEvaluator currentVersion")
}
klog.V(1).Infof("NewResourceExpirationEvaluator with currentVersion: %s.", currentVersion)
ret := &resourceExpirationEvaluator{
strictRemovedHandlingInAlpha: false,
strictRemovedHandlingInAlpha: false,
emulationForwardCompatible: opts.EmulationForwardCompatible,
runtimeConfigEmulationForwardCompatible: opts.RuntimeConfigEmulationForwardCompatible,
}
// Only keeps the major and minor versions from input version.
ret.currentVersion = apimachineryversion.MajorMinor(currentVersion.Major(), currentVersion.Minor())
ret.isAlpha = strings.Contains(currentVersion.PreRelease(), "alpha")
ret.isAlpha = strings.Contains(opts.Prerelease, "alpha")
ret.isAlphaZero = strings.Contains(opts.Prerelease, "alpha.0")
if envString, ok := os.LookupEnv("KUBE_APISERVER_STRICT_REMOVED_API_HANDLING_IN_ALPHA"); !ok {
// do nothing
@ -84,7 +123,8 @@ func NewResourceExpirationEvaluator(currentVersion *apimachineryversion.Version)
return ret, nil
}
func (e *resourceExpirationEvaluator) shouldServe(gv schema.GroupVersion, versioner runtime.ObjectVersioner, resourceServingInfo rest.Storage) bool {
// isNotRemoved checks if a resource is removed due to the APILifecycleRemoved information.
func (e *resourceExpirationEvaluator) isNotRemoved(gv schema.GroupVersion, versioner runtime.ObjectVersioner, resourceServingInfo rest.Storage) bool {
internalPtr := resourceServingInfo.New()
target := gv
@ -99,15 +139,6 @@ func (e *resourceExpirationEvaluator) shouldServe(gv schema.GroupVersion, versio
return false
}
introduced, ok := versionedPtr.(introducedInterface)
if ok {
majorIntroduced, minorIntroduced := introduced.APILifecycleIntroduced()
verIntroduced := apimachineryversion.MajorMinor(uint(majorIntroduced), uint(minorIntroduced))
if e.currentVersion.LessThan(verIntroduced) {
return false
}
}
removed, ok := versionedPtr.(removedInterface)
if !ok {
return true
@ -127,7 +158,7 @@ func (e *resourceExpirationEvaluator) ShouldServeForVersion(majorRemoved, minorR
// at this point major and minor are equal, so this API should be removed when the current release GAs.
// If this is an alpha tag, don't remove by default, but allow the option.
// If the cluster-admin has requested serving one more release, allow it.
if e.isAlpha && e.strictRemovedHandlingInAlpha { // don't serve in alpha if we want strict handling
if e.isAlpha && !e.isAlphaZero && e.strictRemovedHandlingInAlpha { // don't serve in alpha.1+ if we want strict handling
return false
}
if e.isAlpha { // alphas are allowed to continue serving expired betas while we clean up the test
@ -150,13 +181,13 @@ type introducedInterface interface {
// removeDeletedKinds inspects the storage map and modifies it in place by removing storage for kinds that have been deleted.
// versionedResourcesStorageMap mirrors the field on APIGroupInfo, it's a map from version to resource to the storage.
func (e *resourceExpirationEvaluator) RemoveDeletedKinds(groupName string, versioner runtime.ObjectVersioner, versionedResourcesStorageMap map[string]map[string]rest.Storage) {
func (e *resourceExpirationEvaluator) removeDeletedKinds(groupName string, versioner runtime.ObjectVersioner, versionedResourcesStorageMap map[string]map[string]rest.Storage) {
versionsToRemove := sets.NewString()
for apiVersion := range sets.StringKeySet(versionedResourcesStorageMap) {
versionToResource := versionedResourcesStorageMap[apiVersion]
resourcesToRemove := sets.NewString()
for resourceName, resourceServingInfo := range versionToResource {
if !e.shouldServe(schema.GroupVersion{Group: groupName, Version: apiVersion}, versioner, resourceServingInfo) {
if !e.isNotRemoved(schema.GroupVersion{Group: groupName, Version: apiVersion}, versioner, resourceServingInfo) {
resourcesToRemove.Insert(resourceName)
}
}
@ -184,6 +215,126 @@ func (e *resourceExpirationEvaluator) RemoveDeletedKinds(groupName string, versi
}
}
func (e *resourceExpirationEvaluator) RemoveUnavailableKinds(groupName string, versioner runtime.ObjectVersioner, versionedResourcesStorageMap map[string]map[string]rest.Storage, apiResourceConfigSource serverstorage.APIResourceConfigSource) error {
e.removeDeletedKinds(groupName, versioner, versionedResourcesStorageMap)
return e.removeUnintroducedKinds(groupName, versioner, versionedResourcesStorageMap, apiResourceConfigSource)
}
// removeUnintroducedKinds inspects the storage map and modifies it in place by removing storage for kinds that are introduced after the current version.
// versionedResourcesStorageMap mirrors the field on APIGroupInfo, it's a map from version to resource to the storage.
func (e *resourceExpirationEvaluator) removeUnintroducedKinds(groupName string, versioner runtime.ObjectVersioner, versionedResourcesStorageMap map[string]map[string]rest.Storage, apiResourceConfigSource serverstorage.APIResourceConfigSource) error {
versionsToRemove := sets.NewString()
prioritizedVersions := versioner.PrioritizedVersionsForGroup(groupName)
sort.Slice(prioritizedVersions, func(i, j int) bool {
return version.CompareKubeAwareVersionStrings(prioritizedVersions[i].Version, prioritizedVersions[j].Version) > 0
})
enabledResources := sets.NewString()
// iterate from the end to the front, so that we remove the lower priority versions first.
for i := len(prioritizedVersions) - 1; i >= 0; i-- {
apiVersion := prioritizedVersions[i].Version
versionToResource := versionedResourcesStorageMap[apiVersion]
if len(versionToResource) == 0 {
continue
}
resourcesToRemove := sets.NewString()
for resourceName, resourceServingInfo := range versionToResource {
// we check the resource enablement from low priority to high priority.
// If the same resource with a different version that we have checked so far is already enabled, that means some resource with the same resourceName and a lower priority version has been enabled.
// Then emulation forward compatibility for the version being checked now is made based on this information.
lowerPriorityEnabled := enabledResources.Has(resourceName)
shouldKeep, err := e.shouldServeBasedOnVersionIntroduced(schema.GroupVersionResource{Group: groupName, Version: apiVersion, Resource: resourceName},
versioner, resourceServingInfo, apiResourceConfigSource, lowerPriorityEnabled)
if err != nil {
return err
}
if !shouldKeep {
resourcesToRemove.Insert(resourceName)
} else if !alphaPattern.MatchString(apiVersion) {
// enabledResources is passed onto the next iteration to check the enablement of higher priority resources for emulation forward compatibility.
// But enablement alpha apis do not affect the enablement of other versions because emulation forward compatibility is not applicable to alpha apis.
enabledResources.Insert(resourceName)
}
}
for resourceName := range versionedResourcesStorageMap[apiVersion] {
if !shouldRemoveResourceAndSubresources(resourcesToRemove, resourceName) {
continue
}
klog.V(1).Infof("Removing resource %v.%v.%v because it is introduced after the current version %s per APILifecycle.", resourceName, apiVersion, groupName, e.currentVersion.String())
storage := versionToResource[resourceName]
storage.Destroy()
delete(versionToResource, resourceName)
}
versionedResourcesStorageMap[apiVersion] = versionToResource
if len(versionedResourcesStorageMap[apiVersion]) == 0 {
versionsToRemove.Insert(apiVersion)
}
}
for _, apiVersion := range versionsToRemove.List() {
gv := schema.GroupVersion{Group: groupName, Version: apiVersion}
if apiResourceConfigSource != nil && apiResourceConfigSource.VersionExplicitlyEnabled(gv) {
return fmt.Errorf(
"cannot enable version %s in runtime-config because all the resources have been introduced after the current version %s. Consider setting --runtime-config-emulation-forward-compatible=true",
gv, e.currentVersion)
}
klog.V(1).Infof("Removing version %v.%v because it is introduced after the current version %s and because it has no resources per APILifecycle.", apiVersion, groupName, e.currentVersion.String())
delete(versionedResourcesStorageMap, apiVersion)
}
return nil
}
func (e *resourceExpirationEvaluator) shouldServeBasedOnVersionIntroduced(gvr schema.GroupVersionResource, versioner runtime.ObjectVersioner, resourceServingInfo rest.Storage,
apiResourceConfigSource serverstorage.APIResourceConfigSource, lowerPriorityEnabled bool) (bool, error) {
verIntroduced := apimachineryversion.MajorMinor(0, 0)
internalPtr := resourceServingInfo.New()
target := gvr.GroupVersion()
// honor storage that overrides group version (used for things like scale subresources)
if versionProvider, ok := resourceServingInfo.(rest.GroupVersionKindProvider); ok {
target = versionProvider.GroupVersionKind(target).GroupVersion()
}
versionedPtr, err := versioner.ConvertToVersion(internalPtr, target)
if err != nil {
utilruntime.HandleError(err)
return false, err
}
introduced, ok := versionedPtr.(introducedInterface)
if ok {
majorIntroduced, minorIntroduced := introduced.APILifecycleIntroduced()
verIntroduced = apimachineryversion.MajorMinor(uint(majorIntroduced), uint(minorIntroduced))
}
// should serve resource introduced at or before the current version.
if e.currentVersion.AtLeast(verIntroduced) {
return true, nil
}
// the rest of the function is to determine if a resource introduced after current version should be served. (only applicable in emulation mode.)
// if a lower priority version of the resource has been enabled, the same resource with higher priority
// should also be enabled if emulationForwardCompatible = true.
if e.emulationForwardCompatible && lowerPriorityEnabled {
return true, nil
}
if apiResourceConfigSource == nil {
return false, nil
}
// could explicitly enable future resources in runtime-config forward compatible mode.
if e.runtimeConfigEmulationForwardCompatible && (apiResourceConfigSource.ResourceExplicitlyEnabled(gvr) || apiResourceConfigSource.VersionExplicitlyEnabled(gvr.GroupVersion())) {
return true, nil
}
// return error if a future resource is explicit enabled in runtime-config but runtimeConfigEmulationForwardCompatible is false.
if apiResourceConfigSource.ResourceExplicitlyEnabled(gvr) {
return false, fmt.Errorf("cannot enable resource %s in runtime-config because it is introduced at %s after the current version %s. Consider setting --runtime-config-emulation-forward-compatible=true",
gvr, verIntroduced, e.currentVersion)
}
return false, nil
}
func shouldRemoveResourceAndSubresources(resourcesToRemove sets.String, resourceName string) bool {
for _, resourceToRemove := range resourcesToRemove.List() {
if resourceName == resourceToRemove {

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package server contains the plumbing to create kubernetes-like API server command.
package server // import "k8s.io/apiserver/pkg/server"
package server

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package filters contains all the http handler chain filters which
// are not api related.
package filters // import "k8s.io/apiserver/pkg/server/filters"
package filters

View File

@ -52,8 +52,8 @@ var waitingMark = &requestWatermark{
phase: epmetrics.WaitingPhase,
}
var atomicMutatingExecuting, atomicReadOnlyExecuting int32
var atomicMutatingWaiting, atomicReadOnlyWaiting int32
var atomicMutatingExecuting, atomicReadOnlyExecuting atomic.Int32
var atomicMutatingWaiting, atomicReadOnlyWaiting atomic.Int32
// newInitializationSignal is defined for testing purposes.
var newInitializationSignal = utilflowcontrol.NewInitializationSignal
@ -143,16 +143,16 @@ func (h *priorityAndFairnessHandler) Handle(w http.ResponseWriter, r *http.Reque
isMutatingRequest := !nonMutatingRequestVerbs.Has(requestInfo.Verb)
noteExecutingDelta := func(delta int32) {
if isMutatingRequest {
watermark.recordMutating(int(atomic.AddInt32(&atomicMutatingExecuting, delta)))
watermark.recordMutating(int(atomicMutatingExecuting.Add(delta)))
} else {
watermark.recordReadOnly(int(atomic.AddInt32(&atomicReadOnlyExecuting, delta)))
watermark.recordReadOnly(int(atomicReadOnlyExecuting.Add(delta)))
}
}
noteWaitingDelta := func(delta int32) {
if isMutatingRequest {
waitingMark.recordMutating(int(atomic.AddInt32(&atomicMutatingWaiting, delta)))
waitingMark.recordMutating(int(atomicMutatingWaiting.Add(delta)))
} else {
waitingMark.recordReadOnly(int(atomic.AddInt32(&atomicReadOnlyWaiting, delta)))
waitingMark.recordReadOnly(int(atomicReadOnlyWaiting.Add(delta)))
}
}
queueNote := func(inQueue bool) {

View File

@ -54,8 +54,8 @@ import (
"k8s.io/apiserver/pkg/storageversion"
utilfeature "k8s.io/apiserver/pkg/util/feature"
restclient "k8s.io/client-go/rest"
basecompatibility "k8s.io/component-base/compatibility"
"k8s.io/component-base/featuregate"
utilversion "k8s.io/component-base/version"
"k8s.io/klog/v2"
openapibuilder3 "k8s.io/kube-openapi/pkg/builder3"
openapicommon "k8s.io/kube-openapi/pkg/common"
@ -244,7 +244,18 @@ type GenericAPIServer struct {
// EffectiveVersion determines which apis and features are available
// based on when the api/feature lifecyle.
EffectiveVersion utilversion.EffectiveVersion
EffectiveVersion basecompatibility.EffectiveVersion
// EmulationForwardCompatible is an option to implicitly enable all APIs which are introduced after the emulation version and
// have higher priority than APIs of the same group resource enabled at the emulation version.
// If true, all APIs that have higher priority than the APIs(beta+) of the same group resource enabled at the emulation version will be installed.
// This is needed when a controller implementation migrates to newer API versions, for the binary version, and also uses the newer API versions even when emulation version is set.
// Not applicable to alpha APIs.
EmulationForwardCompatible bool
// RuntimeConfigEmulationForwardCompatible is an option to explicitly enable specific APIs introduced after the emulation version through the runtime-config.
// If true, APIs identified by group/version that are enabled in the --runtime-config flag will be installed even if it is introduced after the emulation version. --runtime-config flag values that identify multiple APIs, such as api/all,api/ga,api/beta, are not influenced by this flag and will only enable APIs available at the current emulation version.
// If false, error would be thrown if any GroupVersion or GroupVersionResource explicitly enabled in the --runtime-config flag is introduced after the emulation version.
RuntimeConfigEmulationForwardCompatible bool
// FeatureGate is a way to plumb feature gate through if you have them.
FeatureGate featuregate.FeatureGate
@ -785,28 +796,26 @@ func (s *GenericAPIServer) installAPIResources(apiPrefix string, apiGroupInfo *A
}
resourceInfos = append(resourceInfos, r...)
if s.FeatureGate.Enabled(features.AggregatedDiscoveryEndpoint) {
// Aggregated discovery only aggregates resources under /apis
if apiPrefix == APIGroupPrefix {
s.AggregatedDiscoveryGroupManager.AddGroupVersion(
groupVersion.Group,
apidiscoveryv2.APIVersionDiscovery{
Freshness: apidiscoveryv2.DiscoveryFreshnessCurrent,
Version: groupVersion.Version,
Resources: discoveryAPIResources,
},
)
} else {
// There is only one group version for legacy resources, priority can be defaulted to 0.
s.AggregatedLegacyDiscoveryGroupManager.AddGroupVersion(
groupVersion.Group,
apidiscoveryv2.APIVersionDiscovery{
Freshness: apidiscoveryv2.DiscoveryFreshnessCurrent,
Version: groupVersion.Version,
Resources: discoveryAPIResources,
},
)
}
// Aggregated discovery only aggregates resources under /apis
if apiPrefix == APIGroupPrefix {
s.AggregatedDiscoveryGroupManager.AddGroupVersion(
groupVersion.Group,
apidiscoveryv2.APIVersionDiscovery{
Freshness: apidiscoveryv2.DiscoveryFreshnessCurrent,
Version: groupVersion.Version,
Resources: discoveryAPIResources,
},
)
} else {
// There is only one group version for legacy resources, priority can be defaulted to 0.
s.AggregatedLegacyDiscoveryGroupManager.AddGroupVersion(
groupVersion.Group,
apidiscoveryv2.APIVersionDiscovery{
Freshness: apidiscoveryv2.DiscoveryFreshnessCurrent,
Version: groupVersion.Version,
Resources: discoveryAPIResources,
},
)
}
}
@ -844,12 +853,8 @@ func (s *GenericAPIServer) InstallLegacyAPIGroup(apiPrefix string, apiGroupInfo
// Install the version handler.
// Add a handler at /<apiPrefix> to enumerate the supported api versions.
legacyRootAPIHandler := discovery.NewLegacyRootAPIHandler(s.discoveryAddresses, s.Serializer, apiPrefix)
if s.FeatureGate.Enabled(features.AggregatedDiscoveryEndpoint) {
wrapped := discoveryendpoint.WrapAggregatedDiscoveryToHandler(legacyRootAPIHandler, s.AggregatedLegacyDiscoveryGroupManager)
s.Handler.GoRestfulContainer.Add(wrapped.GenerateWebService("/api", metav1.APIVersions{}))
} else {
s.Handler.GoRestfulContainer.Add(legacyRootAPIHandler.WebService())
}
wrapped := discoveryendpoint.WrapAggregatedDiscoveryToHandler(legacyRootAPIHandler, s.AggregatedLegacyDiscoveryGroupManager)
s.Handler.GoRestfulContainer.Add(wrapped.GenerateWebService("/api", metav1.APIVersions{}))
s.registerStorageReadinessCheck("", apiGroupInfo)
return nil
@ -991,8 +996,18 @@ func (s *GenericAPIServer) newAPIGroupVersion(apiGroupInfo *APIGroupInfo, groupV
// NewDefaultAPIGroupInfo returns an APIGroupInfo stubbed with "normal" values
// exposed for easier composition from other packages
func NewDefaultAPIGroupInfo(group string, scheme *runtime.Scheme, parameterCodec runtime.ParameterCodec, codecs serializer.CodecFactory) APIGroupInfo {
opts := []serializer.CodecFactoryOptionsMutator{}
if utilfeature.DefaultFeatureGate.Enabled(features.CBORServingAndStorage) {
codecs = serializer.NewCodecFactory(scheme, serializer.WithSerializer(cbor.NewSerializerInfo))
opts = append(opts, serializer.WithSerializer(cbor.NewSerializerInfo))
}
if utilfeature.DefaultFeatureGate.Enabled(features.StreamingCollectionEncodingToJSON) {
opts = append(opts, serializer.WithStreamingCollectionEncodingToJSON())
}
if utilfeature.DefaultFeatureGate.Enabled(features.StreamingCollectionEncodingToProtobuf) {
opts = append(opts, serializer.WithStreamingCollectionEncodingToProtobuf())
}
if len(opts) != 0 {
codecs = serializer.NewCodecFactory(scheme, opts...)
}
return APIGroupInfo{
PrioritizedVersions: scheme.PrioritizedVersionsForGroup(group),

View File

@ -19,4 +19,4 @@ limitations under the License.
//
// import "k8s.io/apiserver/pkg/server/healthz"
// healthz.InstallHandler(mux)
package healthz // import "k8s.io/apiserver/pkg/server/healthz"
package healthz

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package httplog contains a helper object and functions to maintain a log
// along with an http response.
package httplog // import "k8s.io/apiserver/pkg/server/httplog"
package httplog

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package mux contains abstractions for http multiplexing of APIs.
package mux // import "k8s.io/apiserver/pkg/server/mux"
package mux

View File

@ -95,6 +95,14 @@ func (s *APIEnablementOptions) ApplyTo(c *server.Config, defaultResourceConfig *
}
mergedResourceConfig, err := resourceconfig.MergeAPIResourceConfigs(defaultResourceConfig, s.RuntimeConfig, registry)
if err != nil {
return err
}
// apply emulation forward compatibility to the api enablement if applicable.
if c.EmulationForwardCompatible {
mergedResourceConfig, err = resourceconfig.EmulationForwardCompatibleResourceConfig(mergedResourceConfig, s.RuntimeConfig, registry)
}
c.MergedResourceConfig = mergedResourceConfig
return err

View File

@ -18,4 +18,4 @@ limitations under the License.
// server. It takes a minimal set of dependencies and does not reference
// implementations, in order to ensure it may be reused by multiple components
// (such as CLI commands that wish to generate or validate config).
package options // import "k8s.io/apiserver/pkg/server/options"
package options

View File

@ -27,9 +27,9 @@ import (
"k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apiserver/pkg/server"
"k8s.io/apiserver/pkg/util/compatibility"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
utilversion "k8s.io/component-base/version"
basecompatibility "k8s.io/component-base/compatibility"
"github.com/spf13/pflag"
)
@ -95,22 +95,32 @@ type ServerRunOptions struct {
ShutdownWatchTerminationGracePeriod time.Duration
// ComponentGlobalsRegistry is the registry where the effective versions and feature gates for all components are stored.
ComponentGlobalsRegistry featuregate.ComponentGlobalsRegistry
ComponentGlobalsRegistry basecompatibility.ComponentGlobalsRegistry
// ComponentName is name under which the server's global variabled are registered in the ComponentGlobalsRegistry.
ComponentName string
// EmulationForwardCompatible is an option to implicitly enable all APIs which are introduced after the emulation version and
// have higher priority than APIs of the same group resource enabled at the emulation version.
// If true, all APIs that have higher priority than the APIs(beta+) of the same group resource enabled at the emulation version will be installed.
// This is needed when a controller implementation migrates to newer API versions, for the binary version, and also uses the newer API versions even when emulation version is set.
// Not applicable to alpha APIs.
EmulationForwardCompatible bool
// RuntimeConfigEmulationForwardCompatible is an option to explicitly enable specific APIs introduced after the emulation version through the runtime-config.
// If true, APIs identified by group/version that are enabled in the --runtime-config flag will be installed even if it is introduced after the emulation version. --runtime-config flag values that identify multiple APIs, such as api/all,api/ga,api/beta, are not influenced by this flag and will only enable APIs available at the current emulation version.
// If false, error would be thrown if any GroupVersion or GroupVersionResource explicitly enabled in the --runtime-config flag is introduced after the emulation version.
RuntimeConfigEmulationForwardCompatible bool
}
func NewServerRunOptions() *ServerRunOptions {
if featuregate.DefaultComponentGlobalsRegistry.EffectiveVersionFor(featuregate.DefaultKubeComponent) == nil {
if compatibility.DefaultComponentGlobalsRegistry.EffectiveVersionFor(basecompatibility.DefaultKubeComponent) == nil {
featureGate := utilfeature.DefaultMutableFeatureGate
effectiveVersion := utilversion.DefaultKubeEffectiveVersion()
utilruntime.Must(featuregate.DefaultComponentGlobalsRegistry.Register(featuregate.DefaultKubeComponent, effectiveVersion, featureGate))
effectiveVersion := compatibility.DefaultBuildEffectiveVersion()
utilruntime.Must(compatibility.DefaultComponentGlobalsRegistry.Register(basecompatibility.DefaultKubeComponent, effectiveVersion, featureGate))
}
return NewServerRunOptionsForComponent(featuregate.DefaultKubeComponent, featuregate.DefaultComponentGlobalsRegistry)
return NewServerRunOptionsForComponent(basecompatibility.DefaultKubeComponent, compatibility.DefaultComponentGlobalsRegistry)
}
func NewServerRunOptionsForComponent(componentName string, componentGlobalsRegistry featuregate.ComponentGlobalsRegistry) *ServerRunOptions {
func NewServerRunOptionsForComponent(componentName string, componentGlobalsRegistry basecompatibility.ComponentGlobalsRegistry) *ServerRunOptions {
defaults := server.NewConfig(serializer.CodecFactory{})
return &ServerRunOptions{
MaxRequestsInFlight: defaults.MaxRequestsInFlight,
@ -152,6 +162,8 @@ func (s *ServerRunOptions) ApplyTo(c *server.Config) error {
c.ShutdownWatchTerminationGracePeriod = s.ShutdownWatchTerminationGracePeriod
c.EffectiveVersion = s.ComponentGlobalsRegistry.EffectiveVersionFor(s.ComponentName)
c.FeatureGate = s.ComponentGlobalsRegistry.FeatureGateFor(s.ComponentName)
c.EmulationForwardCompatible = s.EmulationForwardCompatible
c.RuntimeConfigEmulationForwardCompatible = s.RuntimeConfigEmulationForwardCompatible
return nil
}
@ -231,6 +243,17 @@ func (s *ServerRunOptions) Validate() []error {
if errs := s.ComponentGlobalsRegistry.Validate(); len(errs) != 0 {
errors = append(errors, errs...)
}
effectiveVersion := s.ComponentGlobalsRegistry.EffectiveVersionFor(s.ComponentName)
if effectiveVersion == nil {
return errors
}
notEmulationMode := effectiveVersion.BinaryVersion().WithPatch(0).EqualTo(effectiveVersion.EmulationVersion())
if notEmulationMode && s.EmulationForwardCompatible {
errors = append(errors, fmt.Errorf("ServerRunOptions.EmulationForwardCompatible cannot be set to true if the emulation version is the same as the binary version"))
}
if notEmulationMode && s.RuntimeConfigEmulationForwardCompatible {
errors = append(errors, fmt.Errorf("ServerRunOptions.RuntimeConfigEmulationForwardCompatible cannot be set to true if the emulation version is the same as the binary version"))
}
return errors
}
@ -376,6 +399,13 @@ func (s *ServerRunOptions) AddUniversalFlags(fs *pflag.FlagSet) {
"for active watch request(s) to drain during the graceful server shutdown window.")
s.ComponentGlobalsRegistry.AddFlags(fs)
fs.BoolVar(&s.EmulationForwardCompatible, "emulation-forward-compatible", s.EmulationForwardCompatible, ""+
"If true, for any beta+ APIs enabled by default or by --runtime-config at the emulation version, their future versions with higher priority/stability will be auto enabled even if they introduced after the emulation version. "+
"Can only be set to true if the emulation version is lower than the binary version.")
fs.BoolVar(&s.RuntimeConfigEmulationForwardCompatible, "runtime-config-emulation-forward-compatible", s.RuntimeConfigEmulationForwardCompatible, ""+
"If true, APIs identified by group/version that are enabled in the --runtime-config flag will be installed even if it is introduced after the emulation version. "+
"If false, server would fail to start if any APIs identified by group/version that are enabled in the --runtime-config flag are introduced after the emulation version. "+
"Can only be set to true if the emulation version is lower than the binary version.")
}
// Complete fills missing fields with defaults.

View File

@ -18,6 +18,7 @@ package options
import (
"fmt"
"time"
"github.com/google/uuid"
@ -49,9 +50,18 @@ func (s *SecureServingOptionsWithLoopback) ApplyTo(secureServingInfo **server.Se
return nil
}
// Set a validity period of approximately 3 years for the loopback certificate
// to avoid kube-apiserver disruptions due to certificate expiration.
// When this certificate expires, restarting kube-apiserver will automatically
// regenerate a new certificate with fresh validity dates.
maxAge := (3*365 + 1) * 24 * time.Hour
// create self-signed cert+key with the fake server.LoopbackClientServerNameOverride and
// let the server return it when the loopback client connects.
certPem, keyPem, err := certutil.GenerateSelfSignedCertKey(server.LoopbackClientServerNameOverride, nil, nil)
certPem, keyPem, err := certutil.GenerateSelfSignedCertKeyWithOptions(certutil.SelfSignedCertKeyOptions{
Host: server.LoopbackClientServerNameOverride,
MaxAge: maxAge,
})
if err != nil {
return fmt.Errorf("failed to generate self-signed certificate for loopback connection: %v", err)
}

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package resourceconfig contains the resource config related helper functions.
package resourceconfig // import "k8s.io/apiserver/pkg/server/resourceconfig"
package resourceconfig

View File

@ -19,11 +19,13 @@ package resourceconfig
import (
"fmt"
"regexp"
"sort"
"strconv"
"strings"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/version"
serverstore "k8s.io/apiserver/pkg/server/storage"
cliflag "k8s.io/component-base/cli/flag"
)
@ -36,6 +38,8 @@ type GroupVersionRegistry interface {
IsVersionRegistered(v schema.GroupVersion) bool
// PrioritizedVersionsAllGroups returns all registered group versions.
PrioritizedVersionsAllGroups() []schema.GroupVersion
// PrioritizedVersionsForGroup returns versions for a single group in priority order
PrioritizedVersionsForGroup(group string) []schema.GroupVersion
}
// MergeResourceEncodingConfigs merges the given defaultResourceConfig with specific GroupVersionResource overrides.
@ -100,7 +104,17 @@ func MergeAPIResourceConfigs(
}
}
}
if err := applyVersionAndResourcePreferences(resourceConfig, overrides, registry); err != nil {
return nil, err
}
return resourceConfig, nil
}
func applyVersionAndResourcePreferences(
resourceConfig *serverstore.ResourceConfig,
overrides cliflag.ConfigurationMap,
registry GroupVersionRegistry,
) error {
type versionEnablementPreference struct {
key string
enabled bool
@ -130,7 +144,7 @@ func MergeAPIResourceConfigs(
groupVersionString := tokens[0] + "/" + tokens[1]
groupVersion, err := schema.ParseGroupVersion(groupVersionString)
if err != nil {
return nil, fmt.Errorf("invalid key %s", key)
return fmt.Errorf("invalid key %s", key)
}
// Exclude group not registered into the registry.
@ -140,11 +154,11 @@ func MergeAPIResourceConfigs(
// Verify that the groupVersion is registered into registry.
if !registry.IsVersionRegistered(groupVersion) {
return nil, fmt.Errorf("group version %s that has not been registered", groupVersion.String())
return fmt.Errorf("group version %s that has not been registered", groupVersion.String())
}
enabled, err := getRuntimeConfigValue(overrides, key, false)
if err != nil {
return nil, err
return err
}
switch len(tokens) {
@ -156,7 +170,7 @@ func MergeAPIResourceConfigs(
})
case 3:
if strings.ToLower(tokens[2]) != tokens[2] {
return nil, fmt.Errorf("invalid key %v: group/version/resource and resource is always lowercase plural, not %q", key, tokens[2])
return fmt.Errorf("invalid key %v: group/version/resource and resource is always lowercase plural, not %q", key, tokens[2])
}
resourcePreferences = append(resourcePreferences, resourceEnablementPreference{
key: key,
@ -170,11 +184,11 @@ func MergeAPIResourceConfigs(
for _, versionPreference := range versionPreferences {
if versionPreference.enabled {
// enable the groupVersion for "group/version=true"
resourceConfig.EnableVersions(versionPreference.groupVersion)
resourceConfig.ExplicitlyEnableVersions(versionPreference.groupVersion)
} else {
// disable the groupVersion only for "group/version=false"
resourceConfig.DisableVersions(versionPreference.groupVersion)
resourceConfig.ExplicitlyDisableVersions(versionPreference.groupVersion)
}
}
@ -182,13 +196,12 @@ func MergeAPIResourceConfigs(
for _, resourcePreference := range resourcePreferences {
if resourcePreference.enabled {
// enable the resource for "group/version/resource=true"
resourceConfig.EnableResources(resourcePreference.groupVersionResource)
resourceConfig.ExplicitlyEnableResources(resourcePreference.groupVersionResource)
} else {
resourceConfig.DisableResources(resourcePreference.groupVersionResource)
resourceConfig.ExplicitlyDisableResources(resourcePreference.groupVersionResource)
}
}
return resourceConfig, nil
return nil
}
func getRuntimeConfigValue(overrides cliflag.ConfigurationMap, apiKey string, defaultValue bool) (bool, error) {
@ -227,3 +240,61 @@ func ParseGroups(resourceConfig cliflag.ConfigurationMap) ([]string, error) {
return groups, nil
}
// EmulationForwardCompatibleResourceConfig creates a new ResourceConfig that besides all the enabled resources in resourceConfig,
// enables all higher priority versions of enabled resources, excluding alpha versions.
// This is useful for ensuring forward compatibility when a new version of an API is introduced.
func EmulationForwardCompatibleResourceConfig(
resourceConfig *serverstore.ResourceConfig,
resourceConfigOverrides cliflag.ConfigurationMap,
registry GroupVersionRegistry,
) (*serverstore.ResourceConfig, error) {
ret := serverstore.NewResourceConfig()
for gv, enabled := range resourceConfig.GroupVersionConfigs {
ret.GroupVersionConfigs[gv] = enabled
if !enabled {
continue
}
// emulation forward compatibility is not applicable to alpha apis.
if alphaPattern.MatchString(gv.Version) {
continue
}
// if a gv is enabled, all the versions with higher priority (all the versions before gv in PrioritizedVersionsForGroup) are also implicitly enabled for emulation forward compatibility.
prioritizedVersions := registry.PrioritizedVersionsForGroup(gv.Group)
sort.Slice(prioritizedVersions, func(i, j int) bool {
return version.CompareKubeAwareVersionStrings(prioritizedVersions[i].Version, prioritizedVersions[j].Version) > 0
})
for _, pgv := range prioritizedVersions {
if pgv.Version == gv.Version {
break
}
ret.EnableVersions(pgv)
}
}
for gvr, enabled := range resourceConfig.ResourceConfigs {
ret.ResourceConfigs[gvr] = enabled
if !enabled {
continue
}
// emulation forward compatibility is not applicable to alpha apis.
if alphaPattern.MatchString(gvr.Version) {
continue
}
// if a gvr is enabled, all the versions with the same resource name and higher priority (all the versions before gv in PrioritizedVersionsForGroup) are also implicitly enabled for emulation forward compatibility.
prioritizedVersions := registry.PrioritizedVersionsForGroup(gvr.Group)
sort.Slice(prioritizedVersions, func(i, j int) bool {
return version.CompareKubeAwareVersionStrings(prioritizedVersions[i].Version, prioritizedVersions[j].Version) > 0
})
for _, pgv := range prioritizedVersions {
if pgv.Version == gvr.Version {
break
}
ret.EnableResources(pgv.WithResource(gvr.Resource))
}
}
// need to reapply the version preferences if there is an override of a higher priority version.
if err := applyVersionAndResourcePreferences(ret, resourceConfigOverrides, registry); err != nil {
return nil, err
}
return ret, nil
}

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package routes holds a collection of optional genericapiserver http handlers.
package routes // import "k8s.io/apiserver/pkg/server/routes"
package routes

View File

@ -39,10 +39,10 @@ func (v Version) Install(c *restful.Container) {
// Set up a service to return the git code version.
versionWS := new(restful.WebService)
versionWS.Path("/version")
versionWS.Doc("git code version from which this is built")
versionWS.Doc("get the version information for this server.")
versionWS.Route(
versionWS.GET("/").To(v.handleVersion).
Doc("get the code version").
Doc("get the version information for this server").
Operation("getCodeVersion").
Produces(restful.MIME_JSON).
Consumes(restful.MIME_JSON).

View File

@ -172,33 +172,31 @@ func (s *SecureServingInfo) Serve(handler http.Handler, shutdownTimeout time.Dur
ReadHeaderTimeout: 32 * time.Second, // just shy of requestTimeoutUpperBound
}
// At least 99% of serialized resources in surveyed clusters were smaller than 256kb.
// This should be big enough to accommodate most API POST requests in a single frame,
// and small enough to allow a per connection buffer of this size multiplied by `MaxConcurrentStreams`.
const resourceBody99Percentile = 256 * 1024
http2Options := &http2.Server{
IdleTimeout: 90 * time.Second, // matches http.DefaultTransport keep-alive timeout
}
// shrink the per-stream buffer and max framesize from the 1MB default while still accommodating most API POST requests in a single frame
http2Options.MaxUploadBufferPerStream = resourceBody99Percentile
http2Options.MaxReadFrameSize = resourceBody99Percentile
// use the overridden concurrent streams setting or make the default of 250 explicit so we can size MaxUploadBufferPerConnection appropriately
if s.HTTP2MaxStreamsPerConnection > 0 {
http2Options.MaxConcurrentStreams = uint32(s.HTTP2MaxStreamsPerConnection)
} else {
// match http2.initialMaxConcurrentStreams used by clients
// this makes it so that a malicious client can only open 400 streams before we forcibly close the connection
// https://github.com/golang/net/commit/b225e7ca6dde1ef5a5ae5ce922861bda011cfabd
http2Options.MaxConcurrentStreams = 100
}
// increase the connection buffer size from the 1MB default to handle the specified number of concurrent streams
http2Options.MaxUploadBufferPerConnection = http2Options.MaxUploadBufferPerStream * int32(http2Options.MaxConcurrentStreams)
if !s.DisableHTTP2 {
// At least 99% of serialized resources in surveyed clusters were smaller than 256kb.
// This should be big enough to accommodate most API POST requests in a single frame,
// and small enough to allow a per connection buffer of this size multiplied by `MaxConcurrentStreams`.
const resourceBody99Percentile = 256 * 1024
http2Options := &http2.Server{
IdleTimeout: 90 * time.Second, // matches http.DefaultTransport keep-alive timeout
// shrink the per-stream buffer and max framesize from the 1MB default while still accommodating most API POST requests in a single frame
MaxUploadBufferPerStream: resourceBody99Percentile,
MaxReadFrameSize: resourceBody99Percentile,
}
// use the overridden concurrent streams setting or make the default of 250 explicit so we can size MaxUploadBufferPerConnection appropriately
if s.HTTP2MaxStreamsPerConnection > 0 {
http2Options.MaxConcurrentStreams = uint32(s.HTTP2MaxStreamsPerConnection)
} else {
// match http2.initialMaxConcurrentStreams used by clients
// this makes it so that a malicious client can only open 400 streams before we forcibly close the connection
// https://github.com/golang/net/commit/b225e7ca6dde1ef5a5ae5ce922861bda011cfabd
http2Options.MaxConcurrentStreams = 100
}
// increase the connection buffer size from the 1MB default to handle the specified number of concurrent streams
http2Options.MaxUploadBufferPerConnection = http2Options.MaxUploadBufferPerStream * int32(http2Options.MaxConcurrentStreams)
// apply settings to the server
if err := http2.ConfigureServer(secureServer, http2Options); err != nil {
return nil, nil, fmt.Errorf("error configuring http2: %v", err)
@ -236,8 +234,11 @@ func RunServer(
defer close(serverShutdownCh)
<-stopCh
ctx, cancel := context.WithTimeout(context.Background(), shutDownTimeout)
server.Shutdown(ctx)
cancel()
defer cancel()
err := server.Shutdown(ctx)
if err != nil {
klog.Errorf("Failed to shutdown server: %v", err)
}
}()
go func() {

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package storage contains the plumbing to setup the etcd storage of the apiserver.
package storage // import "k8s.io/apiserver/pkg/server/storage"
package storage

View File

@ -24,17 +24,26 @@ import (
type APIResourceConfigSource interface {
ResourceEnabled(resource schema.GroupVersionResource) bool
AnyResourceForGroupEnabled(group string) bool
ResourceExplicitlyEnabled(resource schema.GroupVersionResource) bool
VersionExplicitlyEnabled(version schema.GroupVersion) bool
}
var _ APIResourceConfigSource = &ResourceConfig{}
type ResourceConfig struct {
GroupVersionConfigs map[schema.GroupVersion]bool
ResourceConfigs map[schema.GroupVersionResource]bool
GroupVersionConfigs map[schema.GroupVersion]bool
ResourceConfigs map[schema.GroupVersionResource]bool
ExplicitGroupVersionConfigs map[schema.GroupVersion]bool
ExplicitResourceConfigs map[schema.GroupVersionResource]bool
}
func NewResourceConfig() *ResourceConfig {
return &ResourceConfig{GroupVersionConfigs: map[schema.GroupVersion]bool{}, ResourceConfigs: map[schema.GroupVersionResource]bool{}}
return &ResourceConfig{
GroupVersionConfigs: map[schema.GroupVersion]bool{},
ResourceConfigs: map[schema.GroupVersionResource]bool{},
ExplicitGroupVersionConfigs: map[schema.GroupVersion]bool{},
ExplicitResourceConfigs: map[schema.GroupVersionResource]bool{},
}
}
// DisableMatchingVersions disables all group/versions for which the matcher function returns true.
@ -77,6 +86,7 @@ func (o *ResourceConfig) removeMatchingResourcePreferences(matcher func(gvr sche
}
for _, k := range keysToRemove {
delete(o.ResourceConfigs, k)
delete(o.ExplicitResourceConfigs, k)
}
}
@ -91,6 +101,13 @@ func (o *ResourceConfig) DisableVersions(versions ...schema.GroupVersion) {
}
}
func (o *ResourceConfig) ExplicitlyDisableVersions(versions ...schema.GroupVersion) {
for _, version := range versions {
o.ExplicitGroupVersionConfigs[version] = false
}
o.DisableVersions(versions...)
}
// EnableVersions enables all resources in a given groupVersion.
// This will remove any preferences previously set on individual resources.
func (o *ResourceConfig) EnableVersions(versions ...schema.GroupVersion) {
@ -103,10 +120,16 @@ func (o *ResourceConfig) EnableVersions(versions ...schema.GroupVersion) {
}
func (o *ResourceConfig) ExplicitlyEnableVersions(versions ...schema.GroupVersion) {
for _, version := range versions {
o.ExplicitGroupVersionConfigs[version] = true
}
o.EnableVersions(versions...)
}
// TODO this must be removed and we enable/disable individual resources.
func (o *ResourceConfig) versionEnabled(version schema.GroupVersion) bool {
enabled, _ := o.GroupVersionConfigs[version]
return enabled
return o.GroupVersionConfigs[version]
}
func (o *ResourceConfig) DisableResources(resources ...schema.GroupVersionResource) {
@ -115,12 +138,26 @@ func (o *ResourceConfig) DisableResources(resources ...schema.GroupVersionResour
}
}
func (o *ResourceConfig) ExplicitlyDisableResources(resources ...schema.GroupVersionResource) {
for _, resource := range resources {
o.ExplicitResourceConfigs[resource] = false
}
o.DisableResources(resources...)
}
func (o *ResourceConfig) EnableResources(resources ...schema.GroupVersionResource) {
for _, resource := range resources {
o.ResourceConfigs[resource] = true
}
}
func (o *ResourceConfig) ExplicitlyEnableResources(resources ...schema.GroupVersionResource) {
for _, resource := range resources {
o.ExplicitResourceConfigs[resource] = true
}
o.EnableResources(resources...)
}
func (o *ResourceConfig) ResourceEnabled(resource schema.GroupVersionResource) bool {
// if a resource is explicitly set, that takes priority over the preference of the version.
resourceEnabled, explicitlySet := o.ResourceConfigs[resource]
@ -151,3 +188,19 @@ func (o *ResourceConfig) AnyResourceForGroupEnabled(group string) bool {
return false
}
func (o *ResourceConfig) ResourceExplicitlyEnabled(resource schema.GroupVersionResource) bool {
resourceEnabled, explicitlySet := o.ExplicitResourceConfigs[resource]
if explicitlySet {
return resourceEnabled
}
return false
}
func (o *ResourceConfig) VersionExplicitlyEnabled(version schema.GroupVersion) bool {
versionEnabled, explicitlySet := o.ExplicitGroupVersionConfigs[version]
if explicitlySet {
return versionEnabled
}
return false
}

View File

@ -22,7 +22,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
apimachineryversion "k8s.io/apimachinery/pkg/util/version"
version "k8s.io/component-base/version"
"k8s.io/apiserver/pkg/util/compatibility"
basecompatibility "k8s.io/component-base/compatibility"
)
type ResourceEncodingConfig interface {
@ -43,7 +44,7 @@ type DefaultResourceEncodingConfig struct {
// resources records the overriding encoding configs for individual resources.
resources map[schema.GroupResource]*OverridingResourceEncoding
scheme *runtime.Scheme
effectiveVersion version.EffectiveVersion
effectiveVersion basecompatibility.EffectiveVersion
}
type OverridingResourceEncoding struct {
@ -54,7 +55,11 @@ type OverridingResourceEncoding struct {
var _ ResourceEncodingConfig = &DefaultResourceEncodingConfig{}
func NewDefaultResourceEncodingConfig(scheme *runtime.Scheme) *DefaultResourceEncodingConfig {
return &DefaultResourceEncodingConfig{resources: map[schema.GroupResource]*OverridingResourceEncoding{}, scheme: scheme, effectiveVersion: version.DefaultKubeEffectiveVersion()}
return NewDefaultResourceEncodingConfigForEffectiveVersion(scheme, compatibility.DefaultComponentGlobalsRegistry.EffectiveVersionFor(basecompatibility.DefaultKubeComponent))
}
func NewDefaultResourceEncodingConfigForEffectiveVersion(scheme *runtime.Scheme, effectiveVersion basecompatibility.EffectiveVersion) *DefaultResourceEncodingConfig {
return &DefaultResourceEncodingConfig{resources: map[schema.GroupResource]*OverridingResourceEncoding{}, scheme: scheme, effectiveVersion: effectiveVersion}
}
func (o *DefaultResourceEncodingConfig) SetResourceEncoding(resourceBeingStored schema.GroupResource, externalEncodingVersion, internalVersion schema.GroupVersion) {
@ -64,7 +69,7 @@ func (o *DefaultResourceEncodingConfig) SetResourceEncoding(resourceBeingStored
}
}
func (o *DefaultResourceEncodingConfig) SetEffectiveVersion(effectiveVersion version.EffectiveVersion) {
func (o *DefaultResourceEncodingConfig) SetEffectiveVersion(effectiveVersion basecompatibility.EffectiveVersion) {
o.effectiveVersion = effectiveVersion
}
@ -121,7 +126,7 @@ type replacementInterface interface {
APILifecycleReplacement() schema.GroupVersionKind
}
func emulatedStorageVersion(binaryVersionOfResource schema.GroupVersion, example runtime.Object, effectiveVersion version.EffectiveVersion, scheme *runtime.Scheme) (schema.GroupVersion, error) {
func emulatedStorageVersion(binaryVersionOfResource schema.GroupVersion, example runtime.Object, effectiveVersion basecompatibility.EffectiveVersion, scheme *runtime.Scheme) (schema.GroupVersion, error) {
if example == nil || effectiveVersion == nil {
return binaryVersionOfResource, nil
}
@ -172,7 +177,7 @@ func emulatedStorageVersion(binaryVersionOfResource schema.GroupVersion, example
}
// If it was introduced after current compatibility version, don't use it
// skip the introduced check for test when currentVersion is 0.0 to test all apis
// skip the introduced check for test when current compatibility version is 0.0 to test all apis
if introduced, hasIntroduced := exampleOfGVK.(introducedInterface); hasIntroduced && (compatibilityVersion.Major() > 0 || compatibilityVersion.Minor() > 0) {
// Skip versions that have a replacement.

View File

@ -42,7 +42,9 @@ import (
"k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/delegator"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
"k8s.io/apiserver/pkg/storage/cacher/progress"
etcdfeature "k8s.io/apiserver/pkg/storage/feature"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/cache"
@ -61,10 +63,16 @@ const (
// storageWatchListPageSize is the cacher's request chunk size of
// initial and resync watch lists to storage.
storageWatchListPageSize = int64(10000)
// DefaultEventFreshDuration is the default time duration of events
// we want to keep.
// We set it to defaultBookmarkFrequency plus epsilon to maximize
// chances that last bookmark was sent within kept history, at the
// same time, minimizing the needed memory usage.
DefaultEventFreshDuration = defaultBookmarkFrequency + 15*time.Second
// defaultBookmarkFrequency defines how frequently watch bookmarks should be send
// in addition to sending a bookmark right before watch deadline.
//
// NOTE: Update `eventFreshDuration` when changing this value.
defaultBookmarkFrequency = time.Minute
)
@ -80,6 +88,10 @@ type Config struct {
// and metrics.
GroupResource schema.GroupResource
// EventsHistoryWindow specifies minimum history duration that storage is keeping.
// If lower than DefaultEventFreshDuration, the cache creation will fail.
EventsHistoryWindow time.Duration
// The Cache will be caching objects of a given Type and assumes that they
// are all stored under ResourcePrefix directory in the underlying database.
ResourcePrefix string
@ -329,10 +341,6 @@ type Cacher struct {
expiredBookmarkWatchers []*cacheWatcher
}
func (c *Cacher) RequestWatchProgress(ctx context.Context) error {
return c.storage.RequestWatchProgress(ctx)
}
// NewCacherFromConfig creates a new Cacher responsible for servicing WATCH and LIST requests from
// its internal cache and updating its cache in the background based on the
// given configuration.
@ -368,7 +376,7 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
objType := reflect.TypeOf(obj)
cacher := &Cacher{
resourcePrefix: config.ResourcePrefix,
ready: newReady(),
ready: newReady(config.Clock),
storage: config.Storage,
objectType: objType,
groupResource: config.GroupResource,
@ -409,9 +417,15 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
contextMetadata = metadata.New(map[string]string{"source": "cache"})
}
progressRequester := newConditionalProgressRequester(config.Storage.RequestWatchProgress, config.Clock, contextMetadata)
eventFreshDuration := config.EventsHistoryWindow
if eventFreshDuration < DefaultEventFreshDuration {
return nil, fmt.Errorf("config.EventsHistoryWindow (%v) must not be below %v", eventFreshDuration, DefaultEventFreshDuration)
}
progressRequester := progress.NewConditionalProgressRequester(config.Storage.RequestWatchProgress, config.Clock, contextMetadata)
watchCache := newWatchCache(
config.KeyFunc, cacher.processEvent, config.GetAttrsFunc, config.Versioner, config.Indexers, config.Clock, config.GroupResource, progressRequester)
config.KeyFunc, cacher.processEvent, config.GetAttrsFunc, config.Versioner, config.Indexers,
config.Clock, eventFreshDuration, config.GroupResource, progressRequester)
listerWatcher := NewListerWatcher(config.Storage, config.ResourcePrefix, config.NewListFunc, contextMetadata)
reflectorName := "storage/cacher.go:" + config.ResourcePrefix
@ -450,85 +464,30 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
}
func (c *Cacher) startCaching(stopChannel <-chan struct{}) {
// The 'usable' lock is always 'RLock'able when it is safe to use the cache.
// It is safe to use the cache after a successful list until a disconnection.
// We start with usable (write) locked. The below OnReplace function will
// unlock it after a successful list. The below defer will then re-lock
// it when this function exits (always due to disconnection), only if
// we actually got a successful list. This cycle will repeat as needed.
successfulList := false
c.watchCache.SetOnReplace(func() {
successfulList = true
c.ready.set(true)
c.ready.setReady()
klog.V(1).Infof("cacher (%v): initialized", c.groupResource.String())
metrics.WatchCacheInitializations.WithLabelValues(c.groupResource.String()).Inc()
})
var err error
defer func() {
if successfulList {
c.ready.set(false)
}
c.ready.setError(err)
}()
c.terminateAllWatchers()
// Note that since onReplace may be not called due to errors, we explicitly
// need to retry it on errors under lock.
// Also note that startCaching is called in a loop, so there's no need
// to have another loop here.
if err := c.reflector.ListAndWatch(stopChannel); err != nil {
err = c.reflector.ListAndWatch(stopChannel)
if err != nil {
klog.Errorf("cacher (%v): unexpected ListAndWatch error: %v; reinitializing...", c.groupResource.String(), err)
}
}
// Versioner implements storage.Interface.
func (c *Cacher) Versioner() storage.Versioner {
return c.storage.Versioner()
}
// Create implements storage.Interface.
func (c *Cacher) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
return c.storage.Create(ctx, key, obj, out, ttl)
}
// Delete implements storage.Interface.
func (c *Cacher) Delete(
ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions,
validateDeletion storage.ValidateObjectFunc, _ runtime.Object, opts storage.DeleteOptions) error {
// Ignore the suggestion and try to pass down the current version of the object
// read from cache.
if elem, exists, err := c.watchCache.GetByKey(key); err != nil {
klog.Errorf("GetByKey returned error: %v", err)
} else if exists {
// DeepCopy the object since we modify resource version when serializing the
// current object.
currObj := elem.(*storeElement).Object.DeepCopyObject()
return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, currObj, opts)
}
// If we couldn't get the object, fallback to no-suggestion.
return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, nil, opts)
}
type namespacedName struct {
namespace string
name string
}
// Watch implements storage.Interface.
func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
pred := opts.Predicate
// if the watch-list feature wasn't set and the resourceVersion is unset
// ensure that the rv from which the watch is being served, is the latest
// one. "latest" is ensured by serving the watch from
// the underlying storage.
//
// it should never happen due to our validation but let's just be super-safe here
// and disable sendingInitialEvents when the feature wasn't enabled
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) && opts.SendInitialEvents != nil {
opts.SendInitialEvents = nil
}
// TODO: we should eventually get rid of this legacy case
if utilfeature.DefaultFeatureGate.Enabled(features.WatchFromStorageWithoutResourceVersion) && opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
return c.storage.Watch(ctx, key, opts)
}
requestedWatchRV, err := c.versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return nil, err
@ -536,10 +495,11 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
var readyGeneration int
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
var ok bool
readyGeneration, ok = c.ready.checkAndReadGeneration()
if !ok {
return nil, errors.NewTooManyRequests("storage is (re)initializing", 1)
var err error
var downtime time.Duration
readyGeneration, downtime, err = c.ready.checkAndReadGeneration()
if err != nil {
return nil, errors.NewTooManyRequests(err.Error(), calculateRetryAfterForUnreadyCache(downtime))
}
} else {
readyGeneration, err = c.ready.waitAndReadGeneration(ctx)
@ -660,7 +620,7 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
c.Lock()
defer c.Unlock()
if generation, ok := c.ready.checkAndReadGeneration(); generation != readyGeneration || !ok {
if generation, _, err := c.ready.checkAndReadGeneration(); generation != readyGeneration || err != nil {
// We went unready or are already on a different generation.
// Avoid registering and starting the watch as it will have to be
// terminated immediately anyway.
@ -693,58 +653,17 @@ func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions
return watcher, nil
}
// Get implements storage.Interface.
func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
ctx, span := tracing.Start(ctx, "cacher.Get",
attribute.String("audit-id", audit.GetAuditIDTruncated(ctx)),
attribute.String("key", key),
attribute.String("resource-version", opts.ResourceVersion))
defer span.End(500 * time.Millisecond)
if opts.ResourceVersion == "" {
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility).
span.AddEvent("About to Get from underlying storage")
return c.storage.Get(ctx, key, opts, objPtr)
}
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.ready.check() {
// If Cache is not initialized, delegate Get requests to storage
// as described in https://kep.k8s.io/4568
span.AddEvent("About to Get from underlying storage - cache not initialized")
return c.storage.Get(ctx, key, opts, objPtr)
}
}
// If resourceVersion is specified, serve it from cache.
// It's guaranteed that the returned value is at least that
// fresh as the given resourceVersion.
getRV, err := c.versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return err
}
// Do not create a trace - it's not for free and there are tons
// of Get requests. We can add it if it will be really needed.
if !utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if getRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
span.AddEvent("About to Get from underlying storage - cache not initialized and no resourceVersion set")
return c.storage.Get(ctx, key, opts, objPtr)
}
if err := c.ready.wait(ctx); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
}
objVal, err := conversion.EnforcePtr(objPtr)
if err != nil {
return err
}
span.AddEvent("About to fetch object from cache")
obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(ctx, getRV, key)
if err != nil {
return err
@ -765,28 +684,6 @@ func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, o
return nil
}
// NOTICE: Keep in sync with shouldListFromStorage function in
//
// staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/list_work_estimator.go
func shouldDelegateList(opts storage.ListOptions) bool {
resourceVersion := opts.ResourceVersion
pred := opts.Predicate
match := opts.ResourceVersionMatch
consistentListFromCacheEnabled := utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache)
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
// Serve consistent reads from storage if ConsistentListFromCache is disabled
consistentReadFromStorage := resourceVersion == "" && !(consistentListFromCacheEnabled && requestWatchProgressSupported)
// Watch cache doesn't support continuations, so serve them from etcd.
hasContinuation := len(pred.Continue) > 0
// Watch cache only supports ResourceVersionMatchNotOlderThan (default).
// see https://kubernetes.io/docs/reference/using-api/api-concepts/#semantics-for-get-and-list
isLegacyExactMatch := opts.Predicate.Limit > 0 && match == "" && len(resourceVersion) > 0 && resourceVersion != "0"
unsupportedMatch := match != "" && match != metav1.ResourceVersionMatchNotOlderThan || isLegacyExactMatch
return consistentReadFromStorage || hasContinuation || unsupportedMatch
}
// computeListLimit determines whether the cacher should
// apply a limit to an incoming LIST request and returns its value.
//
@ -801,55 +698,27 @@ func computeListLimit(opts storage.ListOptions) int64 {
return opts.Predicate.Limit
}
func shouldDelegateListOnNotReadyCache(opts storage.ListOptions) bool {
pred := opts.Predicate
noLabelSelector := pred.Label == nil || pred.Label.Empty()
noFieldSelector := pred.Field == nil || pred.Field.Empty()
hasLimit := pred.Limit > 0
return noLabelSelector && noFieldSelector && hasLimit
}
func (c *Cacher) listItems(ctx context.Context, listRV uint64, key string, pred storage.SelectionPredicate, recursive bool) ([]interface{}, uint64, string, error) {
if !recursive {
func (c *Cacher) listItems(ctx context.Context, listRV uint64, key string, opts storage.ListOptions) (listResp, string, error) {
if !opts.Recursive {
obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(ctx, listRV, key)
if err != nil {
return nil, 0, "", err
return listResp{}, "", err
}
if exists {
return []interface{}{obj}, readResourceVersion, "", nil
return listResp{Items: []interface{}{obj}, ResourceVersion: readResourceVersion}, "", nil
}
return nil, readResourceVersion, "", nil
return listResp{ResourceVersion: readResourceVersion}, "", nil
}
return c.watchCache.WaitUntilFreshAndList(ctx, listRV, key, pred.MatcherIndex(ctx))
return c.watchCache.WaitUntilFreshAndList(ctx, listRV, key, opts)
}
type listResp struct {
Items []interface{}
ResourceVersion uint64
}
// GetList implements storage.Interface
func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
recursive := opts.Recursive
resourceVersion := opts.ResourceVersion
pred := opts.Predicate
if shouldDelegateList(opts) {
return c.storage.GetList(ctx, key, opts, listObj)
}
listRV, err := c.versioner.ParseResourceVersion(resourceVersion)
if err != nil {
return err
}
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.ready.check() && shouldDelegateListOnNotReadyCache(opts) {
// If Cacher is not initialized, delegate List requests to storage
// as described in https://kep.k8s.io/4568
return c.storage.GetList(ctx, key, opts, listObj)
}
} else {
if listRV == 0 && !c.ready.check() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.GetList(ctx, key, opts, listObj)
}
}
// For recursive lists, we need to make sure the key ended with "/" so that we only
// get children "directories". e.g. if we have key "/a", "/a/b", "/ab", getting keys
// with prefix "/a" will return all three, while with prefix "/a/" will return only
@ -858,13 +727,9 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
if opts.Recursive && !strings.HasSuffix(key, "/") {
preparedKey += "/"
}
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
consistentRead := resourceVersion == "" && utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && requestWatchProgressSupported
if consistentRead {
listRV, err = storage.GetCurrentResourceVersionFromStorage(ctx, c.storage, c.newListFunc, c.resourcePrefix, c.objectType.String())
if err != nil {
return err
}
listRV, err := c.versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return err
}
ctx, span := tracing.Start(ctx, "cacher.GetList",
@ -873,10 +738,10 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
defer span.End(500 * time.Millisecond)
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.ready.check() {
if downtime, err := c.ready.check(); err != nil {
// If Cacher is not initialized, reject List requests
// as described in https://kep.k8s.io/4568
return errors.NewTooManyRequests("storage is (re)initializing", 1)
return errors.NewTooManyRequests(err.Error(), calculateRetryAfterForUnreadyCache(downtime))
}
} else {
if err := c.ready.wait(ctx); err != nil {
@ -898,26 +763,11 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
return fmt.Errorf("need a pointer to slice, got %v", listVal.Kind())
}
objs, readResourceVersion, indexUsed, err := c.listItems(ctx, listRV, preparedKey, pred, recursive)
success := "true"
fallback := "false"
resp, indexUsed, err := c.listItems(ctx, listRV, preparedKey, opts)
if err != nil {
if consistentRead {
if storage.IsTooLargeResourceVersion(err) {
fallback = "true"
err = c.storage.GetList(ctx, key, opts, listObj)
}
if err != nil {
success = "false"
}
metrics.ConsistentReadTotal.WithLabelValues(c.resourcePrefix, success, fallback).Add(1)
}
return err
}
if consistentRead {
metrics.ConsistentReadTotal.WithLabelValues(c.resourcePrefix, success, fallback).Add(1)
}
span.AddEvent("Listed items from cache", attribute.Int("count", len(objs)))
span.AddEvent("Listed items from cache", attribute.Int("count", len(resp.Items)))
// store pointer of eligible objects,
// Why not directly put object in the items of listObj?
// the elements in ListObject are Struct type, making slice will bring excessive memory consumption.
@ -926,17 +776,17 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
var lastSelectedObjectKey string
var hasMoreListItems bool
limit := computeListLimit(opts)
for i, obj := range objs {
for i, obj := range resp.Items {
elem, ok := obj.(*storeElement)
if !ok {
return fmt.Errorf("non *storeElement returned from storage: %v", obj)
}
if pred.MatchesObjectAttributes(elem.Labels, elem.Fields) {
if opts.Predicate.MatchesObjectAttributes(elem.Labels, elem.Fields) {
selectedObjects = append(selectedObjects, elem.Object)
lastSelectedObjectKey = elem.Key
}
if limit > 0 && int64(len(selectedObjects)) >= limit {
hasMoreListItems = i < len(objs)-1
hasMoreListItems = i < len(resp.Items)-1
break
}
}
@ -953,47 +803,16 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio
}
span.AddEvent("Filtered items", attribute.Int("count", listVal.Len()))
if c.versioner != nil {
continueValue, remainingItemCount, err := storage.PrepareContinueToken(lastSelectedObjectKey, key, int64(readResourceVersion), int64(len(objs)), hasMoreListItems, opts)
continueValue, remainingItemCount, err := storage.PrepareContinueToken(lastSelectedObjectKey, key, int64(resp.ResourceVersion), int64(len(resp.Items)), hasMoreListItems, opts)
if err != nil {
return err
}
if err = c.versioner.UpdateList(listObj, readResourceVersion, continueValue, remainingItemCount); err != nil {
if err = c.versioner.UpdateList(listObj, resp.ResourceVersion, continueValue, remainingItemCount); err != nil {
return err
}
}
metrics.RecordListCacheMetrics(c.resourcePrefix, indexUsed, len(objs), listVal.Len())
return nil
}
// GuaranteedUpdate implements storage.Interface.
func (c *Cacher) GuaranteedUpdate(
ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool,
preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, _ runtime.Object) error {
// Ignore the suggestion and try to pass down the current version of the object
// read from cache.
if elem, exists, err := c.watchCache.GetByKey(key); err != nil {
klog.Errorf("GetByKey returned error: %v", err)
} else if exists {
// DeepCopy the object since we modify resource version when serializing the
// current object.
currObj := elem.(*storeElement).Object.DeepCopyObject()
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, currObj)
}
// If we couldn't get the object, fallback to no-suggestion.
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, nil)
}
// Count implements storage.Interface.
func (c *Cacher) Count(pathPrefix string) (int64, error) {
return c.storage.Count(pathPrefix)
}
// ReadinessCheck implements storage.Interface.
func (c *Cacher) ReadinessCheck() error {
if !c.ready.check() {
return storage.ErrStorageNotReady
}
metrics.RecordListCacheMetrics(c.resourcePrefix, indexUsed, len(resp.Items), listVal.Len())
return nil
}
@ -1420,7 +1239,7 @@ func (c *Cacher) getWatchCacheResourceVersion(ctx context.Context, parsedWatchRe
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchFromStorageWithoutResourceVersion) && opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
return 0, nil
}
rv, err := storage.GetCurrentResourceVersionFromStorage(ctx, c.storage, c.newListFunc, c.resourcePrefix, c.objectType.String())
rv, err := c.storage.GetCurrentResourceVersion(ctx)
return rv, err
}
@ -1473,6 +1292,11 @@ func (c *Cacher) setInitialEventsEndBookmarkIfRequested(cacheInterval *watchCach
}
}
func (c *Cacher) Ready() bool {
_, err := c.ready.check()
return err == nil
}
// errWatcher implements watch.Interface to return a single error
type errWatcher struct {
result chan watch.Event
@ -1503,6 +1327,55 @@ func newErrWatcher(err error) *errWatcher {
return watcher
}
func (c *Cacher) ShouldDelegateExactRV(resourceVersion string, recursive bool) (delegator.Result, error) {
// Not Recursive is not supported unitl exact RV is implemented for WaitUntilFreshAndGet.
if !recursive || c.watchCache.snapshots == nil {
return delegator.Result{ShouldDelegate: true}, nil
}
listRV, err := c.versioner.ParseResourceVersion(resourceVersion)
if err != nil {
return delegator.Result{}, err
}
return c.shouldDelegateExactRV(listRV)
}
func (c *Cacher) ShouldDelegateContinue(continueToken string, recursive bool) (delegator.Result, error) {
// Not Recursive is not supported unitl exact RV is implemented for WaitUntilFreshAndGet.
if !recursive || c.watchCache.snapshots == nil {
return delegator.Result{ShouldDelegate: true}, nil
}
_, continueRV, err := storage.DecodeContinue(continueToken, c.resourcePrefix)
if err != nil {
return delegator.Result{}, err
}
if continueRV > 0 {
return c.shouldDelegateExactRV(uint64(continueRV))
} else {
// Continue with negative RV is a consistent read.
return c.ShouldDelegateConsistentRead()
}
}
func (c *Cacher) shouldDelegateExactRV(rv uint64) (delegator.Result, error) {
// Exact requests on future revision require support for consistent read, but are not a consistent read by themselves.
if c.watchCache.notFresh(rv) {
return delegator.Result{
ShouldDelegate: !delegator.ConsistentReadSupported(),
}, nil
}
_, canServe := c.watchCache.snapshots.GetLessOrEqual(rv)
return delegator.Result{
ShouldDelegate: !canServe,
}, nil
}
func (c *Cacher) ShouldDelegateConsistentRead() (delegator.Result, error) {
return delegator.Result{
ConsistentRead: true,
ShouldDelegate: !delegator.ConsistentReadSupported(),
}, nil
}
// Implements watch.Interface.
func (c *errWatcher) ResultChan() <-chan watch.Event {
return c.result

View File

@ -0,0 +1,437 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
import (
"context"
"fmt"
"hash"
"hash/fnv"
"os"
"strconv"
"sync"
"time"
"go.opentelemetry.io/otel/attribute"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apiserver/pkg/audit"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/delegator"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/tracing"
"k8s.io/klog/v2"
)
var (
// ConsistencyCheckPeriod is the period of checking consistency between etcd and cache.
// 5 minutes were proposed to match the default compaction period. It's magnitute higher than
// List latency SLO (30 seconds) and timeout (1 minute).
ConsistencyCheckPeriod = 5 * time.Minute
// ConsistencyCheckerEnabled enables the consistency checking mechanism for cache.
// Based on KUBE_WATCHCACHE_CONSISTENCY_CHECKER environment variable.
ConsistencyCheckerEnabled = false
)
func init() {
ConsistencyCheckerEnabled, _ = strconv.ParseBool(os.Getenv("KUBE_WATCHCACHE_CONSISTENCY_CHECKER"))
}
func NewCacheDelegator(cacher *Cacher, storage storage.Interface) *CacheDelegator {
d := &CacheDelegator{
cacher: cacher,
storage: storage,
stopCh: make(chan struct{}),
}
if ConsistencyCheckerEnabled {
d.checker = newConsistencyChecker(cacher.resourcePrefix, cacher.newListFunc, cacher, storage)
d.wg.Add(1)
go func() {
defer d.wg.Done()
d.checker.startChecking(d.stopCh)
}()
}
return d
}
type CacheDelegator struct {
cacher *Cacher
storage storage.Interface
checker *consistencyChecker
wg sync.WaitGroup
stopOnce sync.Once
stopCh chan struct{}
}
var _ storage.Interface = (*CacheDelegator)(nil)
func (c *CacheDelegator) Versioner() storage.Versioner {
return c.storage.Versioner()
}
func (c *CacheDelegator) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
return c.storage.Create(ctx, key, obj, out, ttl)
}
func (c *CacheDelegator) GetCurrentResourceVersion(ctx context.Context) (uint64, error) {
return c.storage.GetCurrentResourceVersion(ctx)
}
func (c *CacheDelegator) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions, validateDeletion storage.ValidateObjectFunc, cachedExistingObject runtime.Object, opts storage.DeleteOptions) error {
// Ignore the suggestion and try to pass down the current version of the object
// read from cache.
if elem, exists, err := c.cacher.watchCache.GetByKey(key); err != nil {
klog.Errorf("GetByKey returned error: %v", err)
} else if exists {
// DeepCopy the object since we modify resource version when serializing the
// current object.
currObj := elem.(*storeElement).Object.DeepCopyObject()
return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, currObj, opts)
}
// If we couldn't get the object, fallback to no-suggestion.
return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, nil, opts)
}
func (c *CacheDelegator) Watch(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
// if the watch-list feature wasn't set and the resourceVersion is unset
// ensure that the rv from which the watch is being served, is the latest
// one. "latest" is ensured by serving the watch from
// the underlying storage.
//
// it should never happen due to our validation but let's just be super-safe here
// and disable sendingInitialEvents when the feature wasn't enabled
if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) && opts.SendInitialEvents != nil {
opts.SendInitialEvents = nil
}
// TODO: we should eventually get rid of this legacy case
if utilfeature.DefaultFeatureGate.Enabled(features.WatchFromStorageWithoutResourceVersion) && opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
return c.storage.Watch(ctx, key, opts)
}
return c.cacher.Watch(ctx, key, opts)
}
func (c *CacheDelegator) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
ctx, span := tracing.Start(ctx, "cacher.Get",
attribute.String("audit-id", audit.GetAuditIDTruncated(ctx)),
attribute.String("key", key),
attribute.String("resource-version", opts.ResourceVersion))
defer span.End(500 * time.Millisecond)
if opts.ResourceVersion == "" {
// If resourceVersion is not specified, serve it from underlying
// storage (for backward compatibility).
span.AddEvent("About to Get from underlying storage")
return c.storage.Get(ctx, key, opts, objPtr)
}
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.cacher.Ready() {
// If Cache is not initialized, delegator Get requests to storage
// as described in https://kep.k8s.io/4568
span.AddEvent("About to Get from underlying storage - cache not initialized")
return c.storage.Get(ctx, key, opts, objPtr)
}
}
// If resourceVersion is specified, serve it from cache.
// It's guaranteed that the returned value is at least that
// fresh as the given resourceVersion.
getRV, err := c.cacher.versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return err
}
// Do not create a trace - it's not for free and there are tons
// of Get requests. We can add it if it will be really needed.
if !utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if getRV == 0 && !c.cacher.Ready() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.Get(ctx, key, opts, objPtr)
}
if err := c.cacher.ready.wait(ctx); err != nil {
return errors.NewServiceUnavailable(err.Error())
}
}
span.AddEvent("About to fetch object from cache")
return c.cacher.Get(ctx, key, opts, objPtr)
}
func (c *CacheDelegator) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
_, _, err := storage.ValidateListOptions(c.cacher.resourcePrefix, c.cacher.versioner, opts)
if err != nil {
return err
}
result, err := delegator.ShouldDelegateList(opts, c.cacher)
if err != nil {
return err
}
if result.ShouldDelegate {
return c.storage.GetList(ctx, key, opts, listObj)
}
listRV, err := c.cacher.versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return err
}
if utilfeature.DefaultFeatureGate.Enabled(features.ResilientWatchCacheInitialization) {
if !c.cacher.Ready() && shouldDelegateListOnNotReadyCache(opts) {
// If Cacher is not initialized, delegator List requests to storage
// as described in https://kep.k8s.io/4568
return c.storage.GetList(ctx, key, opts, listObj)
}
} else {
if listRV == 0 && !c.cacher.Ready() {
// If Cacher is not yet initialized and we don't require any specific
// minimal resource version, simply forward the request to storage.
return c.storage.GetList(ctx, key, opts, listObj)
}
}
if result.ConsistentRead {
listRV, err = c.storage.GetCurrentResourceVersion(ctx)
if err != nil {
return err
}
// Setting resource version for consistent read in cache based on current ResourceVersion in etcd.
opts.ResourceVersion = strconv.FormatInt(int64(listRV), 10)
}
err = c.cacher.GetList(ctx, key, opts, listObj)
success := "true"
fallback := "false"
if err != nil {
if errors.IsResourceExpired(err) {
return c.storage.GetList(ctx, key, opts, listObj)
}
if result.ConsistentRead {
if storage.IsTooLargeResourceVersion(err) {
fallback = "true"
// Reset resourceVersion during fallback from consistent read.
opts.ResourceVersion = ""
err = c.storage.GetList(ctx, key, opts, listObj)
}
if err != nil {
success = "false"
}
metrics.ConsistentReadTotal.WithLabelValues(c.cacher.resourcePrefix, success, fallback).Add(1)
}
return err
}
if result.ConsistentRead {
metrics.ConsistentReadTotal.WithLabelValues(c.cacher.resourcePrefix, success, fallback).Add(1)
}
return nil
}
func shouldDelegateListOnNotReadyCache(opts storage.ListOptions) bool {
pred := opts.Predicate
noLabelSelector := pred.Label == nil || pred.Label.Empty()
noFieldSelector := pred.Field == nil || pred.Field.Empty()
hasLimit := pred.Limit > 0
return noLabelSelector && noFieldSelector && hasLimit
}
func (c *CacheDelegator) GuaranteedUpdate(ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool, preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, cachedExistingObject runtime.Object) error {
// Ignore the suggestion and try to pass down the current version of the object
// read from cache.
if elem, exists, err := c.cacher.watchCache.GetByKey(key); err != nil {
klog.Errorf("GetByKey returned error: %v", err)
} else if exists {
// DeepCopy the object since we modify resource version when serializing the
// current object.
currObj := elem.(*storeElement).Object.DeepCopyObject()
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, currObj)
}
// If we couldn't get the object, fallback to no-suggestion.
return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, nil)
}
func (c *CacheDelegator) Count(pathPrefix string) (int64, error) {
return c.storage.Count(pathPrefix)
}
func (c *CacheDelegator) ReadinessCheck() error {
if !c.cacher.Ready() {
return storage.ErrStorageNotReady
}
return nil
}
func (c *CacheDelegator) RequestWatchProgress(ctx context.Context) error {
return c.storage.RequestWatchProgress(ctx)
}
func (c *CacheDelegator) Stop() {
c.stopOnce.Do(func() {
close(c.stopCh)
})
c.wg.Wait()
}
func newConsistencyChecker(resourcePrefix string, newListFunc func() runtime.Object, cacher getListerReady, etcd getLister) *consistencyChecker {
return &consistencyChecker{
resourcePrefix: resourcePrefix,
newListFunc: newListFunc,
cacher: cacher,
etcd: etcd,
}
}
type consistencyChecker struct {
resourcePrefix string
newListFunc func() runtime.Object
cacher getListerReady
etcd getLister
}
type getListerReady interface {
getLister
Ready() bool
}
type getLister interface {
GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error
}
func (c consistencyChecker) startChecking(stopCh <-chan struct{}) {
err := wait.PollUntilContextCancel(wait.ContextForChannel(stopCh), ConsistencyCheckPeriod, false, func(ctx context.Context) (done bool, err error) {
c.check(ctx)
return false, nil
})
if err != nil {
klog.InfoS("Cache consistency check exiting", "resource", c.resourcePrefix, "err", err)
}
}
func (c *consistencyChecker) check(ctx context.Context) {
digests, err := c.calculateDigests(ctx)
if err != nil {
klog.ErrorS(err, "Cache consistency check error", "resource", c.resourcePrefix)
metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "error").Inc()
return
}
if digests.CacheDigest == digests.EtcdDigest {
klog.V(3).InfoS("Cache consistency check passed", "resource", c.resourcePrefix, "resourceVersion", digests.ResourceVersion, "digest", digests.CacheDigest)
metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "success").Inc()
} else {
klog.ErrorS(nil, "Cache consistency check failed", "resource", c.resourcePrefix, "resourceVersion", digests.ResourceVersion, "etcdDigest", digests.EtcdDigest, "cacheDigest", digests.CacheDigest)
metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "failure").Inc()
// Panic on internal consistency checking enabled only by environment variable. R
panic(fmt.Sprintf("Cache consistency check failed, resource: %q, resourceVersion: %q, etcdDigest: %q, cacheDigest: %q", c.resourcePrefix, digests.ResourceVersion, digests.EtcdDigest, digests.CacheDigest))
}
}
func (c *consistencyChecker) calculateDigests(ctx context.Context) (*storageDigest, error) {
if !c.cacher.Ready() {
return nil, fmt.Errorf("cache is not ready")
}
cacheDigest, resourceVersion, err := c.calculateStoreDigest(ctx, c.cacher, storage.ListOptions{
Recursive: true,
ResourceVersion: "0",
Predicate: storage.Everything,
ResourceVersionMatch: metav1.ResourceVersionMatchNotOlderThan,
})
if err != nil {
return nil, fmt.Errorf("failed calculating cache digest: %w", err)
}
etcdDigest, _, err := c.calculateStoreDigest(ctx, c.etcd, storage.ListOptions{
Recursive: true,
ResourceVersion: resourceVersion,
Predicate: storage.Everything,
ResourceVersionMatch: metav1.ResourceVersionMatchExact,
})
if err != nil {
return nil, fmt.Errorf("failed calculating etcd digest: %w", err)
}
return &storageDigest{
ResourceVersion: resourceVersion,
CacheDigest: cacheDigest,
EtcdDigest: etcdDigest,
}, nil
}
type storageDigest struct {
ResourceVersion string
CacheDigest string
EtcdDigest string
}
func (c *consistencyChecker) calculateStoreDigest(ctx context.Context, store getLister, opts storage.ListOptions) (digest, rv string, err error) {
// TODO: Implement pagination
resp := c.newListFunc()
err = store.GetList(ctx, c.resourcePrefix, opts, resp)
if err != nil {
return "", "", err
}
digest, err = listDigest(resp)
if err != nil {
return "", "", err
}
list, err := meta.ListAccessor(resp)
if err != nil {
return "", "", err
}
return digest, list.GetResourceVersion(), nil
}
func listDigest(list runtime.Object) (string, error) {
h := fnv.New64()
err := meta.EachListItem(list, func(obj runtime.Object) error {
objectMeta, err := meta.Accessor(obj)
if err != nil {
return err
}
err = addObjectToDigest(h, objectMeta)
if err != nil {
return err
}
return nil
})
if err != nil {
return "", err
}
return fmt.Sprintf("%x", h.Sum64()), nil
}
func addObjectToDigest(h hash.Hash64, objectMeta metav1.Object) error {
_, err := h.Write([]byte(objectMeta.GetNamespace()))
if err != nil {
return err
}
_, err = h.Write([]byte("/"))
if err != nil {
return err
}
_, err = h.Write([]byte(objectMeta.GetName()))
if err != nil {
return err
}
_, err = h.Write([]byte("/"))
if err != nil {
return err
}
_, err = h.Write([]byte(objectMeta.GetResourceVersion()))
if err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,113 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package delegator
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
etcdfeature "k8s.io/apiserver/pkg/storage/feature"
utilfeature "k8s.io/apiserver/pkg/util/feature"
)
func ShouldDelegateListMeta(opts *metav1.ListOptions, cache Helper) (Result, error) {
return ShouldDelegateList(
storage.ListOptions{
ResourceVersionMatch: opts.ResourceVersionMatch,
ResourceVersion: opts.ResourceVersion,
Predicate: storage.SelectionPredicate{
Continue: opts.Continue,
Limit: opts.Limit,
},
Recursive: true,
}, cache)
}
func ShouldDelegateList(opts storage.ListOptions, cache Helper) (Result, error) {
// see https://kubernetes.io/docs/reference/using-api/api-concepts/#semantics-for-get-and-list
switch opts.ResourceVersionMatch {
case metav1.ResourceVersionMatchExact:
return cache.ShouldDelegateExactRV(opts.ResourceVersion, opts.Recursive)
case metav1.ResourceVersionMatchNotOlderThan:
return Result{ShouldDelegate: false}, nil
case "":
// Continue
if len(opts.Predicate.Continue) > 0 {
return cache.ShouldDelegateContinue(opts.Predicate.Continue, opts.Recursive)
}
// Legacy exact match
if opts.Predicate.Limit > 0 && len(opts.ResourceVersion) > 0 && opts.ResourceVersion != "0" {
return cache.ShouldDelegateExactRV(opts.ResourceVersion, opts.Recursive)
}
// Consistent Read
if opts.ResourceVersion == "" {
return cache.ShouldDelegateConsistentRead()
}
return Result{ShouldDelegate: false}, nil
default:
return Result{ShouldDelegate: true}, nil
}
}
type Helper interface {
ShouldDelegateExactRV(rv string, recursive bool) (Result, error)
ShouldDelegateContinue(continueToken string, recursive bool) (Result, error)
ShouldDelegateConsistentRead() (Result, error)
}
// Result of delegator decision.
type Result struct {
// Whether a request cannot be served by cache and should be delegated to etcd.
ShouldDelegate bool
// Whether a request is a consistent read, used by delegator to decide if it should call GetCurrentResourceVersion to get RV.
// Included in interface as only cacher has keyPrefix needed to parse continue token.
ConsistentRead bool
}
type CacheWithoutSnapshots struct{}
var _ Helper = CacheWithoutSnapshots{}
func (c CacheWithoutSnapshots) ShouldDelegateContinue(continueToken string, recursive bool) (Result, error) {
return Result{
ShouldDelegate: true,
// Continue with negative RV is considered a consistent read, however token cannot be parsed without keyPrefix unavailable in staging/src/k8s.io/apiserver/pkg/util/flow_control/request/list_work_estimator.go.
ConsistentRead: false,
}, nil
}
func (c CacheWithoutSnapshots) ShouldDelegateExactRV(rv string, recursive bool) (Result, error) {
return Result{
ShouldDelegate: true,
ConsistentRead: false,
}, nil
}
func (c CacheWithoutSnapshots) ShouldDelegateConsistentRead() (Result, error) {
return Result{
ShouldDelegate: !ConsistentReadSupported(),
ConsistentRead: true,
}, nil
}
// ConsistentReadSupported returns whether cache can be used to serve reads with RV not yet observed by cache, including both consistent reads.
// Function is located here to avoid import cycles between staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go and staging/src/k8s.io/apiserver/pkg/util/flow_control/request/list_work_estimator.go.
func ConsistentReadSupported() bool {
consistentListFromCacheEnabled := utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache)
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
return consistentListFromCacheEnabled && requestWatchProgressSupported
}

View File

@ -176,6 +176,14 @@ var (
Help: "Counter for consistent reads from cache.",
StabilityLevel: compbasemetrics.ALPHA,
}, []string{"resource", "success", "fallback"})
StorageConsistencyCheckTotal = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Name: "storage_consistency_checks_total",
Help: "Counter for status of consistency checks between etcd and watch cache",
StabilityLevel: compbasemetrics.INTERNAL,
}, []string{"resource", "status"})
)
var registerMetrics sync.Once
@ -198,6 +206,7 @@ func Register() {
legacyregistry.MustRegister(WatchCacheInitializations)
legacyregistry.MustRegister(WatchCacheReadWait)
legacyregistry.MustRegister(ConsistentReadTotal)
legacyregistry.MustRegister(StorageConsistencyCheckTotal)
})
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package cacher
package progress
import (
"context"
@ -36,8 +36,8 @@ const (
progressRequestPeriod = 100 * time.Millisecond
)
func newConditionalProgressRequester(requestWatchProgress WatchProgressRequester, clock TickerFactory, contextMetadata metadata.MD) *conditionalProgressRequester {
pr := &conditionalProgressRequester{
func NewConditionalProgressRequester(requestWatchProgress WatchProgressRequester, clock TickerFactory, contextMetadata metadata.MD) *ConditionalProgressRequester {
pr := &ConditionalProgressRequester{
clock: clock,
requestWatchProgress: requestWatchProgress,
contextMetadata: contextMetadata,
@ -52,9 +52,9 @@ type TickerFactory interface {
NewTimer(time.Duration) clock.Timer
}
// conditionalProgressRequester will request progress notification if there
// ConditionalProgressRequester will request progress notification if there
// is a request waiting for watch cache to be fresh.
type conditionalProgressRequester struct {
type ConditionalProgressRequester struct {
clock TickerFactory
requestWatchProgress WatchProgressRequester
contextMetadata metadata.MD
@ -65,7 +65,7 @@ type conditionalProgressRequester struct {
stopped bool
}
func (pr *conditionalProgressRequester) Run(stopCh <-chan struct{}) {
func (pr *ConditionalProgressRequester) Run(stopCh <-chan struct{}) {
ctx := wait.ContextForChannel(stopCh)
if pr.contextMetadata != nil {
ctx = metadata.NewOutgoingContext(ctx, pr.contextMetadata)
@ -115,14 +115,14 @@ func (pr *conditionalProgressRequester) Run(stopCh <-chan struct{}) {
}
}
func (pr *conditionalProgressRequester) Add() {
func (pr *ConditionalProgressRequester) Add() {
pr.mux.Lock()
defer pr.mux.Unlock()
pr.waiting += 1
pr.cond.Signal()
}
func (pr *conditionalProgressRequester) Remove() {
func (pr *ConditionalProgressRequester) Remove() {
pr.mux.Lock()
defer pr.mux.Unlock()
pr.waiting -= 1

View File

@ -20,6 +20,9 @@ import (
"context"
"fmt"
"sync"
"time"
"k8s.io/utils/clock"
)
type status int
@ -38,18 +41,26 @@ const (
// | ^
// └---------------------------┘
type ready struct {
state status // represent the state of the variable
state status // represent the state of the variable
lastErr error
generation int // represent the number of times we have transtioned to ready
lock sync.RWMutex // protect the state and generation variables
restartLock sync.Mutex // protect the transition from ready to pending where the channel is recreated
waitCh chan struct{} // blocks until is ready or stopped
clock clock.Clock
lastStateChangeTime time.Time
}
func newReady() *ready {
return &ready{
func newReady(c clock.Clock) *ready {
r := &ready{
waitCh: make(chan struct{}),
state: Pending,
clock: c,
}
r.updateLastStateChangeTimeLocked()
return r
}
// done close the channel once the state is Ready or Stopped
@ -77,8 +88,7 @@ func (r *ready) waitAndReadGeneration(ctx context.Context) (int, error) {
}
r.lock.RLock()
switch r.state {
case Pending:
if r.state == Pending {
// since we allow to switch between the states Pending and Ready
// if there is a quick transition from Pending -> Ready -> Pending
// a process that was waiting can get unblocked and see a Pending
@ -86,43 +96,65 @@ func (r *ready) waitAndReadGeneration(ctx context.Context) (int, error) {
// avoid an inconsistent state on the system, with some processes not
// waiting despite the state moved back to Pending.
r.lock.RUnlock()
case Ready:
generation := r.generation
r.lock.RUnlock()
return generation, nil
case Stopped:
r.lock.RUnlock()
return 0, fmt.Errorf("apiserver cacher is stopped")
default:
r.lock.RUnlock()
return 0, fmt.Errorf("unexpected apiserver cache state: %v", r.state)
continue
}
generation, err := r.readGenerationLocked()
r.lock.RUnlock()
return generation, err
}
}
// check returns true only if it is Ready.
func (r *ready) check() bool {
_, ok := r.checkAndReadGeneration()
return ok
// check returns the time elapsed since the state was last changed and the current value.
func (r *ready) check() (time.Duration, error) {
_, elapsed, err := r.checkAndReadGeneration()
return elapsed, err
}
// checkAndReadGeneration returns the current generation and whether it is Ready.
func (r *ready) checkAndReadGeneration() (int, bool) {
// checkAndReadGeneration returns the current generation, the time elapsed since the state was last changed and the current value.
func (r *ready) checkAndReadGeneration() (int, time.Duration, error) {
r.lock.RLock()
defer r.lock.RUnlock()
return r.generation, r.state == Ready
generation, err := r.readGenerationLocked()
return generation, r.clock.Since(r.lastStateChangeTime), err
}
func (r *ready) readGenerationLocked() (int, error) {
switch r.state {
case Pending:
if r.lastErr == nil {
return 0, fmt.Errorf("storage is (re)initializing")
} else {
return 0, fmt.Errorf("storage is (re)initializing: %w", r.lastErr)
}
case Ready:
return r.generation, nil
case Stopped:
return 0, fmt.Errorf("apiserver cacher is stopped")
default:
return 0, fmt.Errorf("unexpected apiserver cache state: %v", r.state)
}
}
func (r *ready) setReady() {
r.set(true, nil)
}
func (r *ready) setError(err error) {
r.set(false, err)
}
// set the state to Pending (false) or Ready (true), it does not have effect if the state is Stopped.
func (r *ready) set(ok bool) {
func (r *ready) set(ok bool, err error) {
r.lock.Lock()
defer r.lock.Unlock()
if r.state == Stopped {
return
}
r.lastErr = err
if ok && r.state == Pending {
r.state = Ready
r.generation++
r.updateLastStateChangeTimeLocked()
select {
case <-r.waitCh:
default:
@ -139,6 +171,7 @@ func (r *ready) set(ok bool) {
default:
}
r.state = Pending
r.updateLastStateChangeTimeLocked()
}
}
@ -148,6 +181,7 @@ func (r *ready) stop() {
defer r.lock.Unlock()
if r.state != Stopped {
r.state = Stopped
r.updateLastStateChangeTimeLocked()
}
select {
case <-r.waitCh:
@ -155,3 +189,7 @@ func (r *ready) stop() {
close(r.waitCh)
}
}
func (r *ready) updateLastStateChangeTimeLocked() {
r.lastStateChangeTime = r.clock.Now()
}

View File

@ -19,8 +19,6 @@ package cacher
import (
"fmt"
"github.com/google/btree"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
@ -75,7 +73,9 @@ type storeIndexer interface {
}
type orderedLister interface {
ListPrefix(prefix, continueKey string, limit int) (items []interface{}, hasMore bool)
ListPrefix(prefix, continueKey string) []interface{}
Count(prefix, continueKey string) (count int)
Clone() orderedLister
}
func newStoreIndexer(indexers *cache.Indexers) storeIndexer {
@ -97,12 +97,6 @@ type storeElement struct {
Fields fields.Set
}
func (t *storeElement) Less(than btree.Item) bool {
return t.Key < than.(*storeElement).Key
}
var _ btree.Item = (*storeElement)(nil)
func storeElementKey(obj interface{}) (string, error) {
elem, ok := obj.(*storeElement)
if !ok {

View File

@ -18,7 +18,6 @@ package cacher
import (
"fmt"
"math"
"strings"
"sync"
@ -44,6 +43,20 @@ type threadedStoreIndexer struct {
indexer indexer
}
var _ orderedLister = (*threadedStoreIndexer)(nil)
func (si *threadedStoreIndexer) Count(prefix, continueKey string) (count int) {
si.lock.RLock()
defer si.lock.RUnlock()
return si.store.Count(prefix, continueKey)
}
func (si *threadedStoreIndexer) Clone() orderedLister {
si.lock.RLock()
defer si.lock.RUnlock()
return si.store.Clone()
}
func (si *threadedStoreIndexer) Add(obj interface{}) error {
return si.addOrUpdate(obj)
}
@ -73,11 +86,11 @@ func (si *threadedStoreIndexer) Delete(obj interface{}) error {
}
si.lock.Lock()
defer si.lock.Unlock()
oldObj := si.store.deleteElem(storeElem)
if oldObj == nil {
oldObj, existed := si.store.deleteElem(storeElem)
if !existed {
return nil
}
return si.indexer.updateElem(storeElem.Key, oldObj.(*storeElement), nil)
return si.indexer.updateElem(storeElem.Key, oldObj, nil)
}
func (si *threadedStoreIndexer) List() []interface{} {
@ -86,10 +99,10 @@ func (si *threadedStoreIndexer) List() []interface{} {
return si.store.List()
}
func (si *threadedStoreIndexer) ListPrefix(prefix, continueKey string, limit int) ([]interface{}, bool) {
func (si *threadedStoreIndexer) ListPrefix(prefix, continueKey string) []interface{} {
si.lock.RLock()
defer si.lock.RUnlock()
return si.store.ListPrefix(prefix, continueKey, limit)
return si.store.ListPrefix(prefix, continueKey)
}
func (si *threadedStoreIndexer) ListKeys() []string {
@ -128,12 +141,20 @@ func (si *threadedStoreIndexer) ByIndex(indexName, indexValue string) ([]interfa
func newBtreeStore(degree int) btreeStore {
return btreeStore{
tree: btree.New(degree),
tree: btree.NewG(degree, func(a, b *storeElement) bool {
return a.Key < b.Key
}),
}
}
type btreeStore struct {
tree *btree.BTree
tree *btree.BTreeG[*storeElement]
}
func (s *btreeStore) Clone() orderedLister {
return &btreeStore{
tree: s.tree.Clone(),
}
}
func (s *btreeStore) Add(obj interface{}) error {
@ -172,14 +193,14 @@ func (s *btreeStore) Delete(obj interface{}) error {
return nil
}
func (s *btreeStore) deleteElem(storeElem *storeElement) interface{} {
func (s *btreeStore) deleteElem(storeElem *storeElement) (*storeElement, bool) {
return s.tree.Delete(storeElem)
}
func (s *btreeStore) List() []interface{} {
items := make([]interface{}, 0, s.tree.Len())
s.tree.Ascend(func(i btree.Item) bool {
items = append(items, i.(interface{}))
s.tree.Ascend(func(item *storeElement) bool {
items = append(items, item)
return true
})
return items
@ -187,8 +208,8 @@ func (s *btreeStore) List() []interface{} {
func (s *btreeStore) ListKeys() []string {
items := make([]string, 0, s.tree.Len())
s.tree.Ascend(func(i btree.Item) bool {
items = append(items, i.(*storeElement).Key)
s.tree.Ascend(func(item *storeElement) bool {
items = append(items, item.Key)
return true
})
return items
@ -199,11 +220,8 @@ func (s *btreeStore) Get(obj interface{}) (item interface{}, exists bool, err er
if !ok {
return nil, false, fmt.Errorf("obj is not a storeElement")
}
item = s.tree.Get(storeElem)
if item == nil {
return nil, false, nil
}
return item, true, nil
item, exists = s.tree.Get(storeElem)
return item, exists, nil
}
func (s *btreeStore) GetByKey(key string) (item interface{}, exists bool, err error) {
@ -225,54 +243,37 @@ func (s *btreeStore) Replace(objs []interface{}, _ string) error {
// addOrUpdateLocked assumes a lock is held and is used for Add
// and Update operations.
func (s *btreeStore) addOrUpdateElem(storeElem *storeElement) *storeElement {
oldObj := s.tree.ReplaceOrInsert(storeElem)
if oldObj == nil {
return nil
}
return oldObj.(*storeElement)
oldObj, _ := s.tree.ReplaceOrInsert(storeElem)
return oldObj
}
func (s *btreeStore) getByKey(key string) (item interface{}, exists bool, err error) {
keyElement := &storeElement{Key: key}
item = s.tree.Get(keyElement)
return item, item != nil, nil
item, exists = s.tree.Get(keyElement)
return item, exists, nil
}
func (s *btreeStore) ListPrefix(prefix, continueKey string, limit int) ([]interface{}, bool) {
if limit < 0 {
return nil, false
}
func (s *btreeStore) ListPrefix(prefix, continueKey string) []interface{} {
if continueKey == "" {
continueKey = prefix
}
var result []interface{}
var hasMore bool
if limit == 0 {
limit = math.MaxInt
}
s.tree.AscendGreaterOrEqual(&storeElement{Key: continueKey}, func(i btree.Item) bool {
elementKey := i.(*storeElement).Key
if !strings.HasPrefix(elementKey, prefix) {
s.tree.AscendGreaterOrEqual(&storeElement{Key: continueKey}, func(item *storeElement) bool {
if !strings.HasPrefix(item.Key, prefix) {
return false
}
// TODO: Might be worth to lookup one more item to provide more accurate HasMore.
if len(result) >= limit {
hasMore = true
return false
}
result = append(result, i.(interface{}))
result = append(result, item)
return true
})
return result, hasMore
return result
}
func (s *btreeStore) Count(prefix, continueKey string) (count int) {
if continueKey == "" {
continueKey = prefix
}
s.tree.AscendGreaterOrEqual(&storeElement{Key: continueKey}, func(i btree.Item) bool {
elementKey := i.(*storeElement).Key
if !strings.HasPrefix(elementKey, prefix) {
s.tree.AscendGreaterOrEqual(&storeElement{Key: continueKey}, func(item *storeElement) bool {
if !strings.HasPrefix(item.Key, prefix) {
return false
}
count++
@ -391,3 +392,114 @@ func (i *indexer) delete(key, value string, index map[string]map[string]*storeEl
delete(index, value)
}
}
// newStoreSnapshotter returns a storeSnapshotter that stores snapshots for
// serving read requests with exact resource versions (RV) and pagination.
//
// Snapshots are created by calling Clone method on orderedLister, which is
// expected to be fast and efficient thanks to usage of B-trees.
// B-trees can create a lazy copy of the tree structure, minimizing overhead.
//
// Assuming the watch cache observes all events and snapshots cache after each of them,
// requests for a specific resource version can be served by retrieving
// the snapshot with the greatest RV less than or equal to the requested RV.
// To make snapshot retrivial efficient we need an ordered data structure, such as tree.
//
// The initial implementation uses a B-tree to achieve the following performance characteristics (n - number of snapshots stored):
// - `Add`: Adds a new snapshot.
// Complexity: O(log n).
// Executed for each watch event observed by the cache.
// - `GetLessOrEqual`: Retrieves the snapshot with the greatest RV less than or equal to the requested RV.
// Complexity: O(log n).
// Executed for each LIST request with match=Exact or continuation.
// - `RemoveLess`: Cleans up snapshots outside the watch history window.
// Complexity: O(k log n), k - number of snapshots to remove, usually only one if watch capacity was not reduced.
// Executed per watch event observed when the cache is full.
// - `Reset`: Cleans up all snapshots.
// Complexity: O(1).
// Executed when the watch cache is reinitialized.
//
// Further optimization is possible by leveraging the property that adds always
// increase the maximum RV and deletes only increase the minimum RV.
// For example, a binary search on a cyclic buffer of (RV, snapshot)
// should reduce number of allocations and improve removal complexity.
// However, this solution is more complex and is deferred for future implementation.
//
// TODO: Rewrite to use a cyclic buffer
func newStoreSnapshotter() *storeSnapshotter {
s := &storeSnapshotter{
snapshots: btree.NewG[rvSnapshot](btreeDegree, func(a, b rvSnapshot) bool {
return a.resourceVersion < b.resourceVersion
}),
}
return s
}
var _ Snapshotter = (*storeSnapshotter)(nil)
type Snapshotter interface {
Reset()
GetLessOrEqual(rv uint64) (orderedLister, bool)
Add(rv uint64, indexer orderedLister)
RemoveLess(rv uint64)
Len() int
}
type storeSnapshotter struct {
mux sync.RWMutex
snapshots *btree.BTreeG[rvSnapshot]
}
type rvSnapshot struct {
resourceVersion uint64
snapshot orderedLister
}
func (s *storeSnapshotter) Reset() {
s.mux.Lock()
defer s.mux.Unlock()
s.snapshots.Clear(false)
}
func (s *storeSnapshotter) GetLessOrEqual(rv uint64) (orderedLister, bool) {
s.mux.RLock()
defer s.mux.RUnlock()
var result *rvSnapshot
s.snapshots.DescendLessOrEqual(rvSnapshot{resourceVersion: rv}, func(rvs rvSnapshot) bool {
result = &rvs
return false
})
if result == nil {
return nil, false
}
return result.snapshot, true
}
func (s *storeSnapshotter) Add(rv uint64, indexer orderedLister) {
s.mux.Lock()
defer s.mux.Unlock()
s.snapshots.ReplaceOrInsert(rvSnapshot{resourceVersion: rv, snapshot: indexer.Clone()})
}
func (s *storeSnapshotter) RemoveLess(rv uint64) {
s.mux.Lock()
defer s.mux.Unlock()
for s.snapshots.Len() > 0 {
oldest, ok := s.snapshots.Min()
if !ok {
break
}
if rv <= oldest.resourceVersion {
break
}
s.snapshots.DeleteMin()
}
}
func (s *storeSnapshotter) Len() int {
s.mux.RLock()
defer s.mux.RUnlock()
return s.snapshots.Len()
}

View File

@ -17,7 +17,9 @@ limitations under the License.
package cacher
import (
"math"
"strings"
"time"
)
// hasPathPrefix returns true if the string matches pathPrefix exactly, or if is prefixed with pathPrefix at a path segment boundary
@ -44,3 +46,11 @@ func hasPathPrefix(s, pathPrefix string) bool {
}
return false
}
// calculateRetryAfterForUnreadyCache calculates the retry duration based on the cache downtime.
func calculateRetryAfterForUnreadyCache(downtime time.Duration) int {
factor := 0.06
result := math.Exp(factor * downtime.Seconds())
result = math.Min(30, math.Max(1, result))
return int(result)
}

View File

@ -25,6 +25,7 @@ import (
"time"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
@ -32,8 +33,9 @@ import (
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/cacher/delegator"
"k8s.io/apiserver/pkg/storage/cacher/metrics"
etcdfeature "k8s.io/apiserver/pkg/storage/feature"
"k8s.io/apiserver/pkg/storage/cacher/progress"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/cache"
"k8s.io/component-base/tracing"
@ -52,17 +54,11 @@ const (
// after receiving a 'too high resource version' error.
resourceVersionTooHighRetrySeconds = 1
// eventFreshDuration is time duration of events we want to keep.
// We set it to `defaultBookmarkFrequency` plus epsilon to maximize
// chances that last bookmark was sent within kept history, at the
// same time, minimizing the needed memory usage.
eventFreshDuration = 75 * time.Second
// defaultLowerBoundCapacity is a default value for event cache capacity's lower bound.
// TODO: Figure out, to what value we can decreased it.
defaultLowerBoundCapacity = 100
// defaultUpperBoundCapacity should be able to keep eventFreshDuration of history.
// defaultUpperBoundCapacity should be able to keep the required history.
defaultUpperBoundCapacity = 100 * 1024
)
@ -142,6 +138,9 @@ type watchCache struct {
// for testing timeouts.
clock clock.Clock
// eventFreshDuration defines the minimum watch history watchcache will store.
eventFreshDuration time.Duration
// An underlying storage.Versioner.
versioner storage.Versioner
@ -153,7 +152,10 @@ type watchCache struct {
// Requests progress notification if there are requests waiting for watch
// to be fresh
waitingUntilFresh *conditionalProgressRequester
waitingUntilFresh *progress.ConditionalProgressRequester
// Stores previous snapshots of orderedLister to allow serving requests from previous revisions.
snapshots Snapshotter
}
func newWatchCache(
@ -163,15 +165,16 @@ func newWatchCache(
versioner storage.Versioner,
indexers *cache.Indexers,
clock clock.WithTicker,
eventFreshDuration time.Duration,
groupResource schema.GroupResource,
progressRequester *conditionalProgressRequester) *watchCache {
progressRequester *progress.ConditionalProgressRequester) *watchCache {
wc := &watchCache{
capacity: defaultLowerBoundCapacity,
keyFunc: keyFunc,
getAttrsFunc: getAttrsFunc,
cache: make([]*watchCacheEvent, defaultLowerBoundCapacity),
lowerBoundCapacity: defaultLowerBoundCapacity,
upperBoundCapacity: defaultUpperBoundCapacity,
upperBoundCapacity: capacityUpperBound(eventFreshDuration),
startIndex: 0,
endIndex: 0,
store: newStoreIndexer(indexers),
@ -179,10 +182,14 @@ func newWatchCache(
listResourceVersion: 0,
eventHandler: eventHandler,
clock: clock,
eventFreshDuration: eventFreshDuration,
versioner: versioner,
groupResource: groupResource,
waitingUntilFresh: progressRequester,
}
if utilfeature.DefaultFeatureGate.Enabled(features.ListFromCacheSnapshot) {
wc.snapshots = newStoreSnapshotter()
}
metrics.WatchCacheCapacity.WithLabelValues(groupResource.String()).Set(float64(wc.capacity))
wc.cond = sync.NewCond(wc.RLocker())
wc.indexValidator = wc.isIndexValidLocked
@ -190,6 +197,30 @@ func newWatchCache(
return wc
}
// capacityUpperBound denotes the maximum possible capacity of the watch cache
// to which it can resize.
func capacityUpperBound(eventFreshDuration time.Duration) int {
if eventFreshDuration <= DefaultEventFreshDuration {
return defaultUpperBoundCapacity
}
// eventFreshDuration determines how long the watch events are supposed
// to be stored in the watch cache.
// In very high churn situations, there is a need to store more events
// in the watch cache, hence it would have to be upsized accordingly.
// Because of that, for larger values of eventFreshDuration, we set the
// upper bound of the watch cache's capacity proportionally to the ratio
// between eventFreshDuration and DefaultEventFreshDuration.
// Given that the watch cache size can only double, we round up that
// proportion to the next power of two.
exponent := int(math.Ceil((math.Log2(eventFreshDuration.Seconds() / DefaultEventFreshDuration.Seconds()))))
if maxExponent := int(math.Floor((math.Log2(math.MaxInt32 / defaultUpperBoundCapacity)))); exponent > maxExponent {
// Making sure that the capacity's upper bound fits in a 32-bit integer.
exponent = maxExponent
klog.Warningf("Capping watch cache capacity upper bound to %v", defaultUpperBoundCapacity<<exponent)
}
return defaultUpperBoundCapacity << exponent
}
// Add takes runtime.Object as an argument.
func (w *watchCache) Add(obj interface{}) error {
object, resourceVersion, err := w.objectToVersionedRuntimeObject(obj)
@ -287,7 +318,20 @@ func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, upd
w.resourceVersion = resourceVersion
defer w.cond.Broadcast()
return updateFunc(elem)
err := updateFunc(elem)
if err != nil {
return err
}
if w.snapshots != nil {
if orderedLister, ordered := w.store.(orderedLister); ordered {
if w.isCacheFullLocked() {
oldestRV := w.cache[w.startIndex%w.capacity].ResourceVersion
w.snapshots.RemoveLess(oldestRV)
}
w.snapshots.Add(w.resourceVersion, orderedLister)
}
}
return err
}(); err != nil {
return err
}
@ -319,14 +363,14 @@ func (w *watchCache) updateCache(event *watchCacheEvent) {
// - increases capacity by 2x if cache is full and all cached events occurred within last eventFreshDuration.
// - decreases capacity by 2x when recent quarter of events occurred outside of eventFreshDuration(protect watchCache from flapping).
func (w *watchCache) resizeCacheLocked(eventTime time.Time) {
if w.isCacheFullLocked() && eventTime.Sub(w.cache[w.startIndex%w.capacity].RecordTime) < eventFreshDuration {
if w.isCacheFullLocked() && eventTime.Sub(w.cache[w.startIndex%w.capacity].RecordTime) < w.eventFreshDuration {
capacity := min(w.capacity*2, w.upperBoundCapacity)
if capacity > w.capacity {
w.doCacheResizeLocked(capacity)
}
return
}
if w.isCacheFullLocked() && eventTime.Sub(w.cache[(w.endIndex-w.capacity/4)%w.capacity].RecordTime) > eventFreshDuration {
if w.isCacheFullLocked() && eventTime.Sub(w.cache[(w.endIndex-w.capacity/4)%w.capacity].RecordTime) > w.eventFreshDuration {
capacity := max(w.capacity/2, w.lowerBoundCapacity)
if capacity < w.capacity {
w.doCacheResizeLocked(capacity)
@ -452,9 +496,8 @@ func (s sortableStoreElements) Swap(i, j int) {
// WaitUntilFreshAndList returns list of pointers to `storeElement` objects along
// with their ResourceVersion and the name of the index, if any, that was used.
func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, key string, matchValues []storage.MatchValue) (result []interface{}, rv uint64, index string, err error) {
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && requestWatchProgressSupported && w.notFresh(resourceVersion) {
func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, key string, opts storage.ListOptions) (resp listResp, index string, err error) {
if delegator.ConsistentReadSupported() && w.notFresh(resourceVersion) {
w.waitingUntilFresh.Add()
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
w.waitingUntilFresh.Remove()
@ -464,32 +507,84 @@ func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion
defer w.RUnlock()
if err != nil {
return result, rv, index, err
return listResp{}, "", err
}
var prefixFilteredAndOrdered bool
result, rv, index, prefixFilteredAndOrdered, err = func() ([]interface{}, uint64, string, bool, error) {
// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
// requirement here is to NOT miss anything that should be returned. We can return as many non-matching items as we
// want - they will be filtered out later. The fact that we return less things is only further performance improvement.
// TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible.
for _, matchValue := range matchValues {
if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil {
return result, w.resourceVersion, matchValue.IndexName, false, nil
return w.list(ctx, resourceVersion, key, opts)
}
// NOTICE: Structure follows the shouldDelegateList function in
// staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go
func (w *watchCache) list(ctx context.Context, resourceVersion uint64, key string, opts storage.ListOptions) (resp listResp, index string, err error) {
switch opts.ResourceVersionMatch {
case metav1.ResourceVersionMatchExact:
return w.listExactRV(key, "", resourceVersion)
case metav1.ResourceVersionMatchNotOlderThan:
case "":
// Continue
if len(opts.Predicate.Continue) > 0 {
continueKey, continueRV, err := storage.DecodeContinue(opts.Predicate.Continue, key)
if err != nil {
return listResp{}, "", errors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
}
if continueRV > 0 {
return w.listExactRV(key, continueKey, uint64(continueRV))
} else {
// Continue with negative RV is a consistent read - already handled via waitUntilFreshAndBlock.
// Don't pass matchValues as they don't support continueKey
return w.listLatestRV(key, continueKey, nil)
}
}
if store, ok := w.store.(orderedLister); ok {
result, _ := store.ListPrefix(key, "", 0)
return result, w.resourceVersion, "", true, nil
// Legacy exact match
if opts.Predicate.Limit > 0 && len(opts.ResourceVersion) > 0 && opts.ResourceVersion != "0" {
return w.listExactRV(key, "", resourceVersion)
}
return w.store.List(), w.resourceVersion, "", false, nil
}()
if !prefixFilteredAndOrdered {
result, err = filterPrefixAndOrder(key, result)
if err != nil {
return nil, 0, "", err
// Consistent Read - already handled via waitUntilFreshAndBlock
}
return w.listLatestRV(key, "", opts.Predicate.MatcherIndex(ctx))
}
func (w *watchCache) listExactRV(key, continueKey string, resourceVersion uint64) (resp listResp, index string, err error) {
if w.snapshots == nil {
return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
}
store, ok := w.snapshots.GetLessOrEqual(resourceVersion)
if !ok {
return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
}
items := store.ListPrefix(key, continueKey)
return listResp{
Items: items,
ResourceVersion: resourceVersion,
}, "", nil
}
func (w *watchCache) listLatestRV(key, continueKey string, matchValues []storage.MatchValue) (resp listResp, index string, err error) {
// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
// requirement here is to NOT miss anything that should be returned. We can return as many non-matching items as we
// want - they will be filtered out later. The fact that we return less things is only further performance improvement.
// TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible.
for _, matchValue := range matchValues {
if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil {
result, err = filterPrefixAndOrder(key, result)
return listResp{
Items: result,
ResourceVersion: w.resourceVersion,
}, matchValue.IndexName, err
}
}
return result, w.resourceVersion, index, nil
if store, ok := w.store.(orderedLister); ok {
result := store.ListPrefix(key, continueKey)
return listResp{
Items: result,
ResourceVersion: w.resourceVersion,
}, "", nil
}
result := w.store.List()
result, err = filterPrefixAndOrder(key, result)
return listResp{
Items: result,
ResourceVersion: w.resourceVersion,
}, "", err
}
func filterPrefixAndOrder(prefix string, items []interface{}) ([]interface{}, error) {
@ -517,7 +612,7 @@ func (w *watchCache) notFresh(resourceVersion uint64) bool {
// WaitUntilFreshAndGet returns a pointers to <storeElement> object.
func (w *watchCache) WaitUntilFreshAndGet(ctx context.Context, resourceVersion uint64, key string) (interface{}, bool, uint64, error) {
var err error
if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && w.notFresh(resourceVersion) {
if delegator.ConsistentReadSupported() && w.notFresh(resourceVersion) {
w.waitingUntilFresh.Add()
err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
w.waitingUntilFresh.Remove()
@ -600,6 +695,12 @@ func (w *watchCache) Replace(objs []interface{}, resourceVersion string) error {
if err := w.store.Replace(toReplace, resourceVersion); err != nil {
return err
}
if w.snapshots != nil {
w.snapshots.Reset()
if orderedLister, ordered := w.store.(orderedLister); ordered {
w.snapshots.Add(version, orderedLister)
}
}
w.listResourceVersion = version
w.resourceVersion = version
if w.onReplace != nil {
@ -660,7 +761,7 @@ func (w *watchCache) suggestedWatchChannelSize(indexExists, triggerUsed bool) in
// We don't have an exact data, but given we store updates from
// the last <eventFreshDuration>, we approach it by dividing the
// capacity by the length of the history window.
chanSize := int(math.Ceil(float64(w.currentCapacity()) / eventFreshDuration.Seconds()))
chanSize := int(math.Ceil(float64(w.currentCapacity()) / w.eventFreshDuration.Seconds()))
// Finally we adjust the size to avoid ending with too low or
// to large values.
@ -751,7 +852,7 @@ func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64, key string,
// that covers the entire storage state.
// This function assumes to be called under the watchCache lock.
func (w *watchCache) getIntervalFromStoreLocked(key string, matchesSingle bool) (*watchCacheInterval, error) {
ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, w.getAttrsFunc, key, matchesSingle)
ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, key, matchesSingle)
if err != nil {
return nil, err
}

View File

@ -21,9 +21,6 @@ import (
"sort"
"sync"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
)
@ -106,7 +103,6 @@ type watchCacheInterval struct {
initialEventsEndBookmark *watchCacheEvent
}
type attrFunc func(runtime.Object) (labels.Set, fields.Set, error)
type indexerFunc func(int) *watchCacheEvent
type indexValidator func(int) bool
@ -140,10 +136,9 @@ func (s sortableWatchCacheEvents) Swap(i, j int) {
// returned by Next() need to be events from a List() done on the underlying store of
// the watch cache.
// The items returned in the interval will be sorted by Key.
func newCacheIntervalFromStore(resourceVersion uint64, store storeIndexer, getAttrsFunc attrFunc, key string, matchesSingle bool) (*watchCacheInterval, error) {
func newCacheIntervalFromStore(resourceVersion uint64, store storeIndexer, key string, matchesSingle bool) (*watchCacheInterval, error) {
buffer := &watchCacheIntervalBuffer{}
var allItems []interface{}
if matchesSingle {
item, exists, err := store.GetByKey(key)
if err != nil {
@ -162,15 +157,11 @@ func newCacheIntervalFromStore(resourceVersion uint64, store storeIndexer, getAt
if !ok {
return nil, fmt.Errorf("not a storeElement: %v", elem)
}
objLabels, objFields, err := getAttrsFunc(elem.Object)
if err != nil {
return nil, err
}
buffer.buffer[i] = &watchCacheEvent{
Type: watch.Added,
Object: elem.Object,
ObjLabels: objLabels,
ObjFields: objFields,
ObjLabels: elem.Labels,
ObjFields: elem.Fields,
Key: elem.Key,
ResourceVersion: resourceVersion,
}

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Interfaces for database-related operations.
package storage // import "k8s.io/apiserver/pkg/storage"
package storage

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package storage provides conversion of storage errors to API errors.
package errors // import "k8s.io/apiserver/pkg/storage/errors"
package errors

View File

@ -22,6 +22,7 @@ import (
"fmt"
"path"
"reflect"
"strconv"
"strings"
"time"
@ -32,9 +33,10 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/conversion"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/watch"
@ -84,6 +86,9 @@ type store struct {
leaseManager *leaseManager
decoder Decoder
listErrAggrFactory func() ListErrorAggregator
resourcePrefix string
newListFunc func() runtime.Object
}
func (s *store) RequestWatchProgress(ctx context.Context) error {
@ -185,10 +190,13 @@ func newStore(c *kubernetes.Client, codec runtime.Codec, newFunc, newListFunc fu
leaseManager: newDefaultLeaseManager(c.Client, leaseManagerConfig),
decoder: decoder,
listErrAggrFactory: listErrAggrFactory,
resourcePrefix: resourcePrefix,
newListFunc: newListFunc,
}
w.getCurrentStorageRV = func(ctx context.Context) (uint64, error) {
return storage.GetCurrentResourceVersionFromStorage(ctx, s, newListFunc, resourcePrefix, w.objectType)
return s.GetCurrentResourceVersion(ctx)
}
if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) || utilfeature.DefaultFeatureGate.Enabled(features.WatchList) {
etcdfeature.DefaultFeatureSupportChecker.CheckClient(c.Ctx(), c, storage.RequestWatchProgress)
@ -636,45 +644,35 @@ func (s *store) ReadinessCheck() error {
return nil
}
// resolveGetListRev is used by GetList to resolve the rev to use in the client.KV.Get request.
func (s *store) resolveGetListRev(continueKey string, continueRV int64, opts storage.ListOptions) (int64, error) {
var withRev int64
// Uses continueRV if this is a continuation request.
if len(continueKey) > 0 {
if len(opts.ResourceVersion) > 0 && opts.ResourceVersion != "0" {
return withRev, apierrors.NewBadRequest("specifying resource version is not allowed when using continue")
}
// If continueRV > 0, the LIST request needs a specific resource version.
// continueRV==0 is invalid.
// If continueRV < 0, the request is for the latest resource version.
if continueRV > 0 {
withRev = continueRV
}
return withRev, nil
}
// Returns 0 if ResourceVersion is not specified.
if len(opts.ResourceVersion) == 0 {
return withRev, nil
}
parsedRV, err := s.versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return withRev, apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
func (s *store) GetCurrentResourceVersion(ctx context.Context) (uint64, error) {
emptyList := s.newListFunc()
pred := storage.SelectionPredicate{
Label: labels.Everything(),
Field: fields.Everything(),
Limit: 1, // just in case we actually hit something
}
switch opts.ResourceVersionMatch {
case metav1.ResourceVersionMatchNotOlderThan:
// The not older than constraint is checked after we get a response from etcd,
// and returnedRV is then set to the revision we get from the etcd response.
case metav1.ResourceVersionMatchExact:
withRev = int64(parsedRV)
case "": // legacy case
if opts.Recursive && opts.Predicate.Limit > 0 && parsedRV > 0 {
withRev = int64(parsedRV)
}
default:
return withRev, fmt.Errorf("unknown ResourceVersionMatch value: %v", opts.ResourceVersionMatch)
err := s.GetList(ctx, s.resourcePrefix, storage.ListOptions{Predicate: pred}, emptyList)
if err != nil {
return 0, err
}
return withRev, nil
emptyListAccessor, err := meta.ListAccessor(emptyList)
if err != nil {
return 0, err
}
if emptyListAccessor == nil {
return 0, fmt.Errorf("unable to extract a list accessor from %T", emptyList)
}
currentResourceVersion, err := strconv.Atoi(emptyListAccessor.GetResourceVersion())
if err != nil {
return 0, err
}
if currentResourceVersion == 0 {
return 0, fmt.Errorf("the current resource version must be greater than 0")
}
return uint64(currentResourceVersion), nil
}
// GetList implements storage.Interface.
@ -713,15 +711,8 @@ func (s *store) GetList(ctx context.Context, key string, opts storage.ListOption
paging := opts.Predicate.Limit > 0
newItemFunc := getNewItemFunc(listObj, v)
var continueRV, withRev int64
var continueKey string
if opts.Recursive && len(opts.Predicate.Continue) > 0 {
continueKey, continueRV, err = storage.DecodeContinue(opts.Predicate.Continue, keyPrefix)
if err != nil {
return apierrors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
}
}
if withRev, err = s.resolveGetListRev(continueKey, continueRV, opts); err != nil {
withRev, continueKey, err := storage.ValidateListOptions(keyPrefix, s.versioner, opts)
if err != nil {
return err
}

View File

@ -438,7 +438,12 @@ func (wc *watchChan) serialProcessEvents(wg *sync.WaitGroup) {
for {
select {
case e := <-wc.incomingEventChan:
res := wc.transform(e)
res, err := wc.transform(e)
if err != nil {
wc.sendError(err)
return
}
if res == nil {
continue
}
@ -461,10 +466,8 @@ func (wc *watchChan) serialProcessEvents(wg *sync.WaitGroup) {
func (wc *watchChan) concurrentProcessEvents(wg *sync.WaitGroup) {
p := concurrentOrderedEventProcessing{
input: wc.incomingEventChan,
processFunc: wc.transform,
output: wc.resultChan,
processingQueue: make(chan chan *watch.Event, processEventConcurrency-1),
wc: wc,
processingQueue: make(chan chan *processingResult, processEventConcurrency-1),
objectType: wc.watcher.objectType,
groupResource: wc.watcher.groupResource,
@ -481,12 +484,15 @@ func (wc *watchChan) concurrentProcessEvents(wg *sync.WaitGroup) {
}()
}
type concurrentOrderedEventProcessing struct {
input chan *event
processFunc func(*event) *watch.Event
output chan watch.Event
type processingResult struct {
event *watch.Event
err error
}
processingQueue chan chan *watch.Event
type concurrentOrderedEventProcessing struct {
wc *watchChan
processingQueue chan chan *processingResult
// Metadata for logging
objectType string
groupResource schema.GroupResource
@ -498,28 +504,29 @@ func (p *concurrentOrderedEventProcessing) scheduleEventProcessing(ctx context.C
select {
case <-ctx.Done():
return
case e = <-p.input:
case e = <-p.wc.incomingEventChan:
}
processingResponse := make(chan *watch.Event, 1)
processingResponse := make(chan *processingResult, 1)
select {
case <-ctx.Done():
return
case p.processingQueue <- processingResponse:
}
wg.Add(1)
go func(e *event, response chan<- *watch.Event) {
go func(e *event, response chan<- *processingResult) {
defer wg.Done()
responseEvent, err := p.wc.transform(e)
select {
case <-ctx.Done():
case response <- p.processFunc(e):
case response <- &processingResult{event: responseEvent, err: err}:
}
}(e, processingResponse)
}
}
func (p *concurrentOrderedEventProcessing) collectEventProcessing(ctx context.Context) {
var processingResponse chan *watch.Event
var e *watch.Event
var processingResponse chan *processingResult
var r *processingResult
for {
select {
case <-ctx.Done():
@ -529,21 +536,25 @@ func (p *concurrentOrderedEventProcessing) collectEventProcessing(ctx context.Co
select {
case <-ctx.Done():
return
case e = <-processingResponse:
case r = <-processingResponse:
}
if e == nil {
if r.err != nil {
p.wc.sendError(r.err)
return
}
if r.event == nil {
continue
}
if len(p.output) == cap(p.output) {
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", p.objectType, "groupResource", p.groupResource)
if len(p.wc.resultChan) == cap(p.wc.resultChan) {
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", p.wc.watcher.objectType, "groupResource", p.wc.watcher.groupResource)
}
// If user couldn't receive results fast enough, we also block incoming events from watcher.
// Because storing events in local will cause more memory usage.
// The worst case would be closing the fast watcher.
select {
case <-ctx.Done():
case p.wc.resultChan <- *r.event:
case <-p.wc.ctx.Done():
return
case p.output <- *e:
}
}
}
@ -561,12 +572,11 @@ func (wc *watchChan) acceptAll() bool {
}
// transform transforms an event into a result for user if not filtered.
func (wc *watchChan) transform(e *event) (res *watch.Event) {
func (wc *watchChan) transform(e *event) (res *watch.Event, err error) {
curObj, oldObj, err := wc.prepareObjs(e)
if err != nil {
klog.Errorf("failed to prepare current and previous objects: %v", err)
wc.sendError(err)
return nil
return nil, err
}
switch {
@ -574,12 +584,11 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
object := wc.watcher.newFunc()
if err := wc.watcher.versioner.UpdateObject(object, uint64(e.rev)); err != nil {
klog.Errorf("failed to propagate object version: %v", err)
return nil
return nil, fmt.Errorf("failed to propagate object resource version: %w", err)
}
if e.isInitialEventsEndBookmark {
if err := storage.AnnotateInitialEventsEndBookmark(object); err != nil {
wc.sendError(fmt.Errorf("error while accessing object's metadata gr: %v, type: %v, obj: %#v, err: %v", wc.watcher.groupResource, wc.watcher.objectType, object, err))
return nil
return nil, fmt.Errorf("error while accessing object's metadata gr: %v, type: %v, obj: %#v, err: %w", wc.watcher.groupResource, wc.watcher.objectType, object, err)
}
}
res = &watch.Event{
@ -588,7 +597,7 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
}
case e.isDeleted:
if !wc.filter(oldObj) {
return nil
return nil, nil
}
res = &watch.Event{
Type: watch.Deleted,
@ -596,7 +605,7 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
}
case e.isCreated:
if !wc.filter(curObj) {
return nil
return nil, nil
}
res = &watch.Event{
Type: watch.Added,
@ -608,7 +617,7 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
Type: watch.Modified,
Object: curObj,
}
return res
return res, nil
}
curObjPasses := wc.filter(curObj)
oldObjPasses := wc.filter(oldObj)
@ -630,7 +639,7 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
}
}
}
return res
return res, nil
}
func transformErrorToEvent(err error) *watch.Event {

View File

@ -20,6 +20,7 @@ import (
"context"
"fmt"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
@ -262,6 +263,10 @@ type Interface interface {
// TODO: Remove when storage.Interface will be separate from etc3.store.
// Deprecated: Added temporarily to simplify exposing RequestProgress for watch cache.
RequestWatchProgress(ctx context.Context) error
// GetCurrentResourceVersion gets the current resource version from etcd.
// This method issues an empty list request and reads only the ResourceVersion from the object metadata
GetCurrentResourceVersion(ctx context.Context) (uint64, error)
}
// GetOptions provides the options that may be provided for storage get operations.
@ -325,3 +330,43 @@ type DeleteOptions struct {
// object which otherwise can not be deleted using the normal flow
IgnoreStoreReadError bool
}
func ValidateListOptions(keyPrefix string, versioner Versioner, opts ListOptions) (withRev int64, continueKey string, err error) {
if opts.Recursive && len(opts.Predicate.Continue) > 0 {
continueKey, continueRV, err := DecodeContinue(opts.Predicate.Continue, keyPrefix)
if err != nil {
return 0, "", apierrors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
}
if len(opts.ResourceVersion) > 0 && opts.ResourceVersion != "0" {
return 0, "", apierrors.NewBadRequest("specifying resource version is not allowed when using continue")
}
// If continueRV > 0, the LIST request needs a specific resource version.
// continueRV==0 is invalid.
// If continueRV < 0, the request is for the latest resource version.
if continueRV > 0 {
withRev = continueRV
}
return withRev, continueKey, nil
}
if len(opts.ResourceVersion) == 0 {
return withRev, "", nil
}
parsedRV, err := versioner.ParseResourceVersion(opts.ResourceVersion)
if err != nil {
return withRev, "", apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
}
switch opts.ResourceVersionMatch {
case metav1.ResourceVersionMatchNotOlderThan:
// The not older than constraint is checked after we get a response from etcd,
// and returnedRV is then set to the revision we get from the etcd response.
case metav1.ResourceVersionMatchExact:
withRev = int64(parsedRV)
case "": // legacy case
if opts.Recursive && opts.Predicate.Limit > 0 && parsedRV > 0 {
withRev = int64(parsedRV)
}
default:
return withRev, "", fmt.Errorf("unknown ResourceVersionMatch value: %v", opts.ResourceVersionMatch)
}
return withRev, "", nil
}

View File

@ -37,6 +37,7 @@ const (
DefaultCompactInterval = 5 * time.Minute
DefaultDBMetricPollInterval = 30 * time.Second
DefaultEventsHistoryWindow = 75 * time.Second
DefaultHealthcheckTimeout = 2 * time.Second
DefaultReadinessTimeout = 2 * time.Second
)
@ -80,6 +81,8 @@ type Config struct {
CountMetricPollPeriod time.Duration
// DBMetricPollInterval specifies how often should storage backend metric be updated.
DBMetricPollInterval time.Duration
// EventsHistoryWindow specifies minimum history duration that storage is keeping.
EventsHistoryWindow time.Duration
// HealthcheckTimeout specifies the timeout used when checking health
HealthcheckTimeout time.Duration
// ReadycheckTimeout specifies the timeout used when checking readiness
@ -115,6 +118,7 @@ func NewDefaultConfig(prefix string, codec runtime.Codec) *Config {
Codec: codec,
CompactionInterval: DefaultCompactInterval,
DBMetricPollInterval: DefaultDBMetricPollInterval,
EventsHistoryWindow: DefaultEventsHistoryWindow,
HealthcheckTimeout: DefaultHealthcheckTimeout,
ReadycheckTimeout: DefaultReadinessTimeout,
LeaseManagerConfig: etcd3.NewDefaultLeaseManagerConfig(),

View File

@ -17,16 +17,12 @@ limitations under the License.
package storage
import (
"context"
"fmt"
"strconv"
"sync/atomic"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/validation/path"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
)
@ -81,45 +77,6 @@ func (hwm *HighWaterMark) Update(current int64) bool {
}
}
// GetCurrentResourceVersionFromStorage gets the current resource version from the underlying storage engine.
// This method issues an empty list request and reads only the ResourceVersion from the object metadata
func GetCurrentResourceVersionFromStorage(ctx context.Context, storage Interface, newListFunc func() runtime.Object, resourcePrefix, objectType string) (uint64, error) {
if storage == nil {
return 0, fmt.Errorf("storage wasn't provided for %s", objectType)
}
if newListFunc == nil {
return 0, fmt.Errorf("newListFunction wasn't provided for %s", objectType)
}
emptyList := newListFunc()
pred := SelectionPredicate{
Label: labels.Everything(),
Field: fields.Everything(),
Limit: 1, // just in case we actually hit something
}
err := storage.GetList(ctx, resourcePrefix, ListOptions{Predicate: pred}, emptyList)
if err != nil {
return 0, err
}
emptyListAccessor, err := meta.ListAccessor(emptyList)
if err != nil {
return 0, err
}
if emptyListAccessor == nil {
return 0, fmt.Errorf("unable to extract a list accessor from %T", emptyList)
}
currentResourceVersion, err := strconv.Atoi(emptyListAccessor.GetResourceVersion())
if err != nil {
return 0, err
}
if currentResourceVersion == 0 {
return 0, fmt.Errorf("the current resource version must be greater than 0")
}
return uint64(currentResourceVersion), nil
}
// AnnotateInitialEventsEndBookmark adds a special annotation to the given object
// which indicates that the initial events have been sent.
//

View File

@ -0,0 +1,53 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package compatibility
import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
utilfeature "k8s.io/apiserver/pkg/util/feature"
basecompatibility "k8s.io/component-base/compatibility"
)
// DefaultComponentGlobalsRegistry is the global var to store the effective versions and feature gates for all components for easy access.
// Example usage:
// // register the component effective version and feature gate first
// wardleEffectiveVersion := basecompatibility.NewEffectiveVersion("1.2")
// wardleFeatureGate := featuregate.NewFeatureGate()
// utilruntime.Must(compatibility.DefaultComponentGlobalsRegistry.Register(apiserver.WardleComponentName, wardleEffectiveVersion, wardleFeatureGate, false))
//
// cmd := &cobra.Command{
// ...
// // call DefaultComponentGlobalsRegistry.Set() in PersistentPreRunE to ensure the feature gates are set based on emulation version right after parsing the flags.
// PersistentPreRunE: func(*cobra.Command, []string) error {
// if err := compatibility.DefaultComponentGlobalsRegistry.Set(); err != nil {
// return err
// }
// ...
// },
// RunE: func(c *cobra.Command, args []string) error {
// // call compatibility.DefaultComponentGlobalsRegistry.Validate() somewhere
// },
// }
//
// flags := cmd.Flags()
// // add flags
// compatibility.DefaultComponentGlobalsRegistry.AddFlags(flags)
var DefaultComponentGlobalsRegistry basecompatibility.ComponentGlobalsRegistry = basecompatibility.NewComponentGlobalsRegistry()
func init() {
utilruntime.Must(DefaultComponentGlobalsRegistry.Register(basecompatibility.DefaultKubeComponent, DefaultBuildEffectiveVersion(), utilfeature.DefaultMutableFeatureGate))
}

View File

@ -0,0 +1,65 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package compatibility
import (
"k8s.io/apimachinery/pkg/util/version"
basecompatibility "k8s.io/component-base/compatibility"
baseversion "k8s.io/component-base/version"
)
// minimumKubeEmulationVersion is the first release emulation version is introduced,
// so the emulation version cannot go lower than that.
var minimumKubeEmulationVersion *version.Version = version.MajorMinor(1, 31)
// DefaultBuildEffectiveVersion returns the MutableEffectiveVersion based on the
// current build information.
func DefaultBuildEffectiveVersion() basecompatibility.MutableEffectiveVersion {
binaryVersion := defaultBuildBinaryVersion()
useDefaultBuildBinaryVersion := true
// fall back to the hard coded kube version only when the git tag is not available for local unit tests.
if binaryVersion.Major() == 0 && binaryVersion.Minor() == 0 {
useDefaultBuildBinaryVersion = false
binaryVersion = version.MustParse(baseversion.DefaultKubeBinaryVersion)
}
versionFloor := kubeEffectiveVersionFloors(binaryVersion)
return basecompatibility.NewEffectiveVersion(binaryVersion, useDefaultBuildBinaryVersion, versionFloor, versionFloor)
}
func kubeEffectiveVersionFloors(binaryVersion *version.Version) *version.Version {
// both emulationVersion and minCompatibilityVersion can be set to binaryVersion - 3
versionFloor := binaryVersion.WithPatch(0).SubtractMinor(3)
if versionFloor.LessThan(minimumKubeEmulationVersion) {
versionFloor = minimumKubeEmulationVersion
}
return versionFloor
}
// DefaultKubeEffectiveVersionForTest returns the MutableEffectiveVersion based on the
// latest K8s release hardcoded in DefaultKubeBinaryVersion.
// DefaultKubeBinaryVersion is hard coded because defaultBuildBinaryVersion would return 0.0 when test is run without a git tag.
// We do not enforce the N-3..N emulation version range in tests so that the tests would not automatically fail when there is a version bump.
// Only used in tests.
func DefaultKubeEffectiveVersionForTest() basecompatibility.MutableEffectiveVersion {
binaryVersion := version.MustParse(baseversion.DefaultKubeBinaryVersion)
return basecompatibility.NewEffectiveVersion(binaryVersion, false, version.MustParse("0.0"), version.MustParse("0.0"))
}
func defaultBuildBinaryVersion() *version.Version {
verInfo := baseversion.Get()
return version.MustParse(verInfo.String())
}

View File

@ -28,7 +28,7 @@ import (
"sync"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp" //nolint:depguard
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

View File

@ -116,4 +116,4 @@ limitations under the License.
// queues virtual start time is advanced by G. When a request
// finishes being served, and the actual service time was S, the
// queues virtual start time is decremented by G - S.
package queueset // import "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset"
package queueset

View File

@ -19,15 +19,11 @@ package request
import (
"math"
"net/http"
"net/url"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/features"
"k8s.io/apiserver/pkg/storage"
etcdfeature "k8s.io/apiserver/pkg/storage/feature"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/apiserver/pkg/storage/cacher/delegator"
"k8s.io/klog/v2"
)
@ -86,8 +82,13 @@ func (e *listWorkEstimator) estimate(r *http.Request, flowSchemaName, priorityLe
return WorkEstimate{InitialSeats: e.config.MinimumSeats}
}
}
isListFromCache := requestInfo.Verb == "watch" || !shouldListFromStorage(query, &listOptions)
// TODO: Check whether watchcache is enabled.
result, err := delegator.ShouldDelegateListMeta(&listOptions, delegator.CacheWithoutSnapshots{})
if err != nil {
return WorkEstimate{InitialSeats: maxSeats}
}
listFromStorage := result.ShouldDelegate
isListFromCache := requestInfo.Verb == "watch" || !listFromStorage
numStored, err := e.countGetterFn(key(requestInfo))
switch {
@ -159,24 +160,3 @@ func key(requestInfo *apirequest.RequestInfo) string {
}
return groupResource.String()
}
// NOTICE: Keep in sync with shouldDelegateList function in
//
// staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher.go
func shouldListFromStorage(query url.Values, opts *metav1.ListOptions) bool {
resourceVersion := opts.ResourceVersion
match := opts.ResourceVersionMatch
consistentListFromCacheEnabled := utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache)
requestWatchProgressSupported := etcdfeature.DefaultFeatureSupportChecker.Supports(storage.RequestWatchProgress)
// Serve consistent reads from storage if ConsistentListFromCache is disabled
consistentReadFromStorage := resourceVersion == "" && !(consistentListFromCacheEnabled && requestWatchProgressSupported)
// Watch cache doesn't support continuations, so serve them from etcd.
hasContinuation := len(opts.Continue) > 0
// Watch cache only supports ResourceVersionMatchNotOlderThan (default).
// see https://kubernetes.io/docs/reference/using-api/api-concepts/#semantics-for-get-and-list
isLegacyExactMatch := opts.Limit > 0 && match == "" && len(resourceVersion) > 0 && resourceVersion != "0"
unsupportedMatch := match != "" && match != metav1.ResourceVersionMatchNotOlderThan || isLegacyExactMatch
return consistentReadFromStorage || hasContinuation || unsupportedMatch
}

View File

@ -16,4 +16,4 @@ limitations under the License.
// Package flushwriter implements a wrapper for a writer that flushes on every
// write if that writer implements the io.Flusher interface
package flushwriter // import "k8s.io/apiserver/pkg/util/flushwriter"
package flushwriter

View File

@ -50,6 +50,11 @@ func Register() {
})
}
// Only used for tests.
func Reset() {
legacyregistry.Reset()
}
// IncPeerProxiedRequest increments the # of proxied requests to peer kube-apiserver
func IncPeerProxiedRequest(ctx context.Context, status string) {
peerProxiedRequestsTotal.WithContext(ctx).WithLabelValues(status).Add(1)

88
e2e/vendor/k8s.io/apiserver/pkg/validation/metrics.go generated vendored Normal file
View File

@ -0,0 +1,88 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
const (
namespace = "apiserver" // Keep it consistent; apiserver is handling it
subsystem = "validation"
)
// ValidationMetrics is the interface for validation metrics.
type ValidationMetrics interface {
IncDeclarativeValidationMismatchMetric()
IncDeclarativeValidationPanicMetric()
Reset()
}
var validationMetricsInstance = &validationMetrics{
DeclarativeValidationMismatchCounter: metrics.NewCounter(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "declarative_validation_mismatch_total",
Help: "Number of times declarative validation results differed from handwritten validation results for core types.",
StabilityLevel: metrics.BETA,
},
),
DeclarativeValidationPanicCounter: metrics.NewCounter(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "declarative_validation_panic_total",
Help: "Number of times declarative validation has panicked during validation.",
StabilityLevel: metrics.BETA,
},
),
}
// Metrics provides access to validation metrics.
var Metrics ValidationMetrics = validationMetricsInstance
func init() {
legacyregistry.MustRegister(validationMetricsInstance.DeclarativeValidationMismatchCounter)
legacyregistry.MustRegister(validationMetricsInstance.DeclarativeValidationPanicCounter)
}
type validationMetrics struct {
DeclarativeValidationMismatchCounter *metrics.Counter
DeclarativeValidationPanicCounter *metrics.Counter
}
// Reset resets the validation metrics.
func (m *validationMetrics) Reset() {
m.DeclarativeValidationMismatchCounter.Reset()
m.DeclarativeValidationPanicCounter.Reset()
}
// IncDeclarativeValidationMismatchMetric increments the counter for the declarative_validation_mismatch_total metric.
func (m *validationMetrics) IncDeclarativeValidationMismatchMetric() {
m.DeclarativeValidationMismatchCounter.Inc()
}
// IncDeclarativeValidationPanicMetric increments the counter for the declarative_validation_panic_total metric.
func (m *validationMetrics) IncDeclarativeValidationPanicMetric() {
m.DeclarativeValidationPanicCounter.Inc()
}
func ResetValidationMetricsInstance() {
validationMetricsInstance.Reset()
}

Some files were not shown because too many files have changed in this diff Show More