mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-10-19 13:49:53 +00:00
2551a0b05f
Signed-off-by: Niels de Vos <ndevos@ibm.com>
963 lines
37 KiB
Go
963 lines
37 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package encryptionconfig
|
|
|
|
import (
|
|
"context"
|
|
"crypto/aes"
|
|
"crypto/cipher"
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
"k8s.io/apimachinery/pkg/runtime/serializer"
|
|
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
|
"k8s.io/apimachinery/pkg/util/uuid"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
apiserverconfig "k8s.io/apiserver/pkg/apis/config"
|
|
apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1"
|
|
"k8s.io/apiserver/pkg/apis/config/validation"
|
|
"k8s.io/apiserver/pkg/features"
|
|
"k8s.io/apiserver/pkg/server/healthz"
|
|
"k8s.io/apiserver/pkg/storage/value"
|
|
aestransformer "k8s.io/apiserver/pkg/storage/value/encrypt/aes"
|
|
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope"
|
|
envelopekmsv2 "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2"
|
|
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics"
|
|
"k8s.io/apiserver/pkg/storage/value/encrypt/identity"
|
|
"k8s.io/apiserver/pkg/storage/value/encrypt/secretbox"
|
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
|
"k8s.io/klog/v2"
|
|
kmsservice "k8s.io/kms/pkg/service"
|
|
)
|
|
|
|
const (
|
|
aesCBCTransformerPrefixV1 = "k8s:enc:aescbc:v1:"
|
|
aesGCMTransformerPrefixV1 = "k8s:enc:aesgcm:v1:"
|
|
secretboxTransformerPrefixV1 = "k8s:enc:secretbox:v1:"
|
|
kmsTransformerPrefixV1 = "k8s:enc:kms:v1:"
|
|
kmsTransformerPrefixV2 = "k8s:enc:kms:v2:"
|
|
|
|
// these constants relate to how the KMS v2 plugin status poll logic
|
|
// and the DEK generation logic behave. In particular, the positive
|
|
// interval and max TTL are closely related as the difference between
|
|
// these values defines the worst case window in which the write DEK
|
|
// could expire due to the plugin going into an error state. The
|
|
// worst case window divided by the negative interval defines the
|
|
// minimum amount of times the server will attempt to return to a
|
|
// healthy state before the DEK expires and writes begin to fail.
|
|
//
|
|
// For now, these values are kept small and hardcoded to support being
|
|
// able to perform a "passive" storage migration while tolerating some
|
|
// amount of plugin downtime.
|
|
//
|
|
// With the current approach, a user can update the key ID their plugin
|
|
// is using and then can simply schedule a migration for 3 + N + M minutes
|
|
// later where N is how long it takes their plugin to pick up new config
|
|
// and M is extra buffer to allow the API server to process the config.
|
|
// At that point, they are guaranteed to either migrate to the new key
|
|
// or get errors during the migration.
|
|
//
|
|
// If the API server coasted forever on the last DEK, they would need
|
|
// to actively check if it had observed the new key ID before starting
|
|
// a migration - otherwise it could keep using the old DEK and their
|
|
// storage migration would not do what they thought it did.
|
|
kmsv2PluginHealthzPositiveInterval = 1 * time.Minute
|
|
kmsv2PluginHealthzNegativeInterval = 10 * time.Second
|
|
kmsv2PluginWriteDEKMaxTTL = 3 * time.Minute
|
|
|
|
kmsPluginHealthzNegativeTTL = 3 * time.Second
|
|
kmsPluginHealthzPositiveTTL = 20 * time.Second
|
|
kmsAPIVersionV1 = "v1"
|
|
kmsAPIVersionV2 = "v2"
|
|
// this name is used for two different healthz endpoints:
|
|
// - when one or more KMS v2 plugins are in use and no KMS v1 plugins are in use
|
|
// in this case, all v2 plugins are probed via this single endpoint
|
|
// - when automatic reload of encryption config is enabled
|
|
// in this case, all KMS plugins are probed via this single endpoint
|
|
// the endpoint is present even if there are no KMS plugins configured (it is a no-op then)
|
|
kmsReloadHealthCheckName = "kms-providers"
|
|
)
|
|
|
|
type kmsPluginHealthzResponse struct {
|
|
err error
|
|
received time.Time
|
|
}
|
|
|
|
type kmsPluginProbe struct {
|
|
name string
|
|
ttl time.Duration
|
|
service envelope.Service
|
|
lastResponse *kmsPluginHealthzResponse
|
|
l *sync.Mutex
|
|
}
|
|
|
|
type kmsv2PluginProbe struct {
|
|
state atomic.Pointer[envelopekmsv2.State]
|
|
name string
|
|
ttl time.Duration
|
|
service kmsservice.Service
|
|
lastResponse *kmsPluginHealthzResponse
|
|
l *sync.Mutex
|
|
}
|
|
|
|
type kmsHealthChecker []healthz.HealthChecker
|
|
|
|
func (k kmsHealthChecker) Name() string {
|
|
return kmsReloadHealthCheckName
|
|
}
|
|
|
|
func (k kmsHealthChecker) Check(req *http.Request) error {
|
|
var errs []error
|
|
|
|
for i := range k {
|
|
checker := k[i]
|
|
if err := checker.Check(req); err != nil {
|
|
errs = append(errs, fmt.Errorf("%s: %w", checker.Name(), err))
|
|
}
|
|
}
|
|
|
|
return utilerrors.Reduce(utilerrors.NewAggregate(errs))
|
|
}
|
|
|
|
func (h *kmsPluginProbe) toHealthzCheck(idx int) healthz.HealthChecker {
|
|
return healthz.NamedCheck(fmt.Sprintf("kms-provider-%d", idx), func(r *http.Request) error {
|
|
return h.check()
|
|
})
|
|
}
|
|
|
|
func (h *kmsv2PluginProbe) toHealthzCheck(idx int) healthz.HealthChecker {
|
|
return healthz.NamedCheck(fmt.Sprintf("kms-provider-%d", idx), func(r *http.Request) error {
|
|
return h.check(r.Context())
|
|
})
|
|
}
|
|
|
|
// EncryptionConfiguration represents the parsed and normalized encryption configuration for the apiserver.
|
|
type EncryptionConfiguration struct {
|
|
// Transformers is a list of value.Transformer that will be used to encrypt and decrypt data.
|
|
Transformers map[schema.GroupResource]value.Transformer
|
|
|
|
// HealthChecks is a list of healthz.HealthChecker that will be used to check the health of the encryption providers.
|
|
HealthChecks []healthz.HealthChecker
|
|
|
|
// EncryptionFileContentHash is the hash of the encryption config file.
|
|
EncryptionFileContentHash string
|
|
|
|
// KMSCloseGracePeriod is the duration we will wait before closing old transformers.
|
|
// We wait for any in-flight requests to finish by using the duration which is longer than their timeout.
|
|
KMSCloseGracePeriod time.Duration
|
|
}
|
|
|
|
// LoadEncryptionConfig parses and validates the encryption config specified by filepath.
|
|
// It may launch multiple go routines whose lifecycle is controlled by ctx.
|
|
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
|
|
// If reload is true, or KMS v2 plugins are used with no KMS v1 plugins, the returned slice of health checkers will always be of length 1.
|
|
func LoadEncryptionConfig(ctx context.Context, filepath string, reload bool) (*EncryptionConfiguration, error) {
|
|
config, contentHash, err := loadConfig(filepath, reload)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error while parsing file: %w", err)
|
|
}
|
|
|
|
transformers, kmsHealthChecks, kmsUsed, err := getTransformerOverridesAndKMSPluginHealthzCheckers(ctx, config)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error while building transformers: %w", err)
|
|
}
|
|
|
|
if reload || (kmsUsed.v2Used && !kmsUsed.v1Used) {
|
|
kmsHealthChecks = []healthz.HealthChecker{kmsHealthChecker(kmsHealthChecks)}
|
|
}
|
|
|
|
// KMSCloseGracePeriod is the duration we will wait before closing old transformers.
|
|
// The way we calculate is as follows:
|
|
// 1. Sum all timeouts across all KMS plugins. (check kmsPrefixTransformer for differences between v1 and v2)
|
|
// 2. Multiply that by 2 (to allow for some buffer)
|
|
// The reason we sum all timeout is because kmsHealthChecker() will run all health checks serially
|
|
return &EncryptionConfiguration{
|
|
Transformers: transformers,
|
|
HealthChecks: kmsHealthChecks,
|
|
EncryptionFileContentHash: contentHash,
|
|
KMSCloseGracePeriod: 2 * kmsUsed.kmsTimeoutSum,
|
|
}, nil
|
|
}
|
|
|
|
// getTransformerOverridesAndKMSPluginHealthzCheckers creates the set of transformers and KMS healthz checks based on the given config.
|
|
// It may launch multiple go routines whose lifecycle is controlled by ctx.
|
|
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
|
|
func getTransformerOverridesAndKMSPluginHealthzCheckers(ctx context.Context, config *apiserverconfig.EncryptionConfiguration) (map[schema.GroupResource]value.Transformer, []healthz.HealthChecker, *kmsState, error) {
|
|
var kmsHealthChecks []healthz.HealthChecker
|
|
transformers, probes, kmsUsed, err := getTransformerOverridesAndKMSPluginProbes(ctx, config)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
for i := range probes {
|
|
probe := probes[i]
|
|
kmsHealthChecks = append(kmsHealthChecks, probe.toHealthzCheck(i))
|
|
}
|
|
|
|
return transformers, kmsHealthChecks, kmsUsed, nil
|
|
}
|
|
|
|
type healthChecker interface {
|
|
toHealthzCheck(idx int) healthz.HealthChecker
|
|
}
|
|
|
|
// getTransformerOverridesAndKMSPluginProbes creates the set of transformers and KMS probes based on the given config.
|
|
// It may launch multiple go routines whose lifecycle is controlled by ctx.
|
|
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
|
|
func getTransformerOverridesAndKMSPluginProbes(ctx context.Context, config *apiserverconfig.EncryptionConfiguration) (map[schema.GroupResource]value.Transformer, []healthChecker, *kmsState, error) {
|
|
resourceToPrefixTransformer := map[schema.GroupResource][]value.PrefixTransformer{}
|
|
var probes []healthChecker
|
|
var kmsUsed kmsState
|
|
|
|
// For each entry in the configuration
|
|
for _, resourceConfig := range config.Resources {
|
|
resourceConfig := resourceConfig
|
|
|
|
transformers, p, used, err := prefixTransformersAndProbes(ctx, resourceConfig)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
kmsUsed.accumulate(used)
|
|
|
|
// For each resource, create a list of providers to use
|
|
for _, resource := range resourceConfig.Resources {
|
|
resource := resource
|
|
gr := schema.ParseGroupResource(resource)
|
|
|
|
// check if resource is masked by *.group rule
|
|
anyResourceInGroup := schema.GroupResource{Group: gr.Group, Resource: "*"}
|
|
if _, masked := resourceToPrefixTransformer[anyResourceInGroup]; masked {
|
|
// an earlier rule already configured a transformer for *.group, masking this rule
|
|
// return error since this is not allowed
|
|
return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyResourceInGroup))
|
|
}
|
|
|
|
if _, masked := resourceToPrefixTransformer[anyGroupAnyResource]; masked {
|
|
// an earlier rule already configured a transformer for *.*, masking this rule
|
|
// return error since this is not allowed
|
|
return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyGroupAnyResource))
|
|
}
|
|
|
|
resourceToPrefixTransformer[gr] = append(resourceToPrefixTransformer[gr], transformers...)
|
|
}
|
|
|
|
probes = append(probes, p...)
|
|
}
|
|
|
|
transformers := make(map[schema.GroupResource]value.Transformer, len(resourceToPrefixTransformer))
|
|
for gr, transList := range resourceToPrefixTransformer {
|
|
gr := gr
|
|
transList := transList
|
|
transformers[gr] = value.NewPrefixTransformers(fmt.Errorf("no matching prefix found"), transList...)
|
|
}
|
|
|
|
return transformers, probes, &kmsUsed, nil
|
|
}
|
|
|
|
// check encrypts and decrypts test data against KMS-Plugin's gRPC endpoint.
|
|
func (h *kmsPluginProbe) check() error {
|
|
h.l.Lock()
|
|
defer h.l.Unlock()
|
|
|
|
if (time.Since(h.lastResponse.received)) < h.ttl {
|
|
return h.lastResponse.err
|
|
}
|
|
|
|
p, err := h.service.Encrypt([]byte("ping"))
|
|
if err != nil {
|
|
h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()}
|
|
h.ttl = kmsPluginHealthzNegativeTTL
|
|
return fmt.Errorf("failed to perform encrypt section of the healthz check for KMS Provider %s, error: %w", h.name, err)
|
|
}
|
|
|
|
if _, err := h.service.Decrypt(p); err != nil {
|
|
h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()}
|
|
h.ttl = kmsPluginHealthzNegativeTTL
|
|
return fmt.Errorf("failed to perform decrypt section of the healthz check for KMS Provider %s, error: %w", h.name, err)
|
|
}
|
|
|
|
h.lastResponse = &kmsPluginHealthzResponse{err: nil, received: time.Now()}
|
|
h.ttl = kmsPluginHealthzPositiveTTL
|
|
return nil
|
|
}
|
|
|
|
// check gets the healthz status of the KMSv2-Plugin using the Status() method.
|
|
func (h *kmsv2PluginProbe) check(ctx context.Context) error {
|
|
h.l.Lock()
|
|
defer h.l.Unlock()
|
|
|
|
if time.Since(h.lastResponse.received) < h.ttl {
|
|
return h.lastResponse.err
|
|
}
|
|
|
|
p, err := h.service.Status(ctx)
|
|
if err != nil {
|
|
h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()}
|
|
h.ttl = kmsPluginHealthzNegativeTTL
|
|
return fmt.Errorf("failed to perform status section of the healthz check for KMS Provider %s, error: %w", h.name, err)
|
|
}
|
|
|
|
if err := h.isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx, p); err != nil {
|
|
h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()}
|
|
h.ttl = kmsPluginHealthzNegativeTTL
|
|
return err
|
|
}
|
|
|
|
h.lastResponse = &kmsPluginHealthzResponse{err: nil, received: time.Now()}
|
|
h.ttl = kmsPluginHealthzPositiveTTL
|
|
return nil
|
|
}
|
|
|
|
// rotateDEKOnKeyIDChange tries to rotate to a new DEK if the key ID returned by Status does not match the
|
|
// current state. If a successful rotation is performed, the new DEK and keyID overwrite the existing state.
|
|
// On any failure during rotation (including mismatch between status and encrypt calls), the current state is
|
|
// preserved and will remain valid to use for encryption until its expiration (the system attempts to coast).
|
|
// If the key ID returned by Status matches the current state, the expiration of the current state is extended
|
|
// and no rotation is performed.
|
|
func (h *kmsv2PluginProbe) rotateDEKOnKeyIDChange(ctx context.Context, statusKeyID, uid string) error {
|
|
// we do not check ValidateEncryptCapability here because it is fine to re-use an old key
|
|
// that was marked as expired during an unhealthy period. As long as the key ID matches
|
|
// what we expect then there is no need to rotate here.
|
|
state, errState := h.getCurrentState()
|
|
|
|
// allow reads indefinitely in all cases
|
|
// allow writes indefinitely as long as there is no error
|
|
// allow writes for only up to kmsv2PluginWriteDEKMaxTTL from now when there are errors
|
|
// we start the timer before we make the network call because kmsv2PluginWriteDEKMaxTTL is meant to be the upper bound
|
|
expirationTimestamp := envelopekmsv2.NowFunc().Add(kmsv2PluginWriteDEKMaxTTL)
|
|
|
|
// state is valid and status keyID is unchanged from when we generated this DEK so there is no need to rotate it
|
|
// just move the expiration of the current state forward by the reuse interval
|
|
if errState == nil && state.KeyID == statusKeyID {
|
|
state.ExpirationTimestamp = expirationTimestamp
|
|
h.state.Store(&state)
|
|
return nil
|
|
}
|
|
|
|
transformer, resp, cacheKey, errGen := envelopekmsv2.GenerateTransformer(ctx, uid, h.service)
|
|
|
|
if resp == nil {
|
|
resp = &kmsservice.EncryptResponse{} // avoid nil panics
|
|
}
|
|
|
|
// happy path, should be the common case
|
|
// TODO maybe add success metrics?
|
|
if errGen == nil && resp.KeyID == statusKeyID {
|
|
h.state.Store(&envelopekmsv2.State{
|
|
Transformer: transformer,
|
|
EncryptedDEK: resp.Ciphertext,
|
|
KeyID: resp.KeyID,
|
|
Annotations: resp.Annotations,
|
|
UID: uid,
|
|
ExpirationTimestamp: expirationTimestamp,
|
|
CacheKey: cacheKey,
|
|
})
|
|
klog.V(6).InfoS("successfully rotated DEK",
|
|
"uid", uid,
|
|
"newKeyID", resp.KeyID,
|
|
"oldKeyID", state.KeyID,
|
|
"expirationTimestamp", expirationTimestamp.Format(time.RFC3339),
|
|
)
|
|
return nil
|
|
}
|
|
|
|
return fmt.Errorf("failed to rotate DEK uid=%q, errState=%v, errGen=%v, statusKeyID=%q, encryptKeyID=%q, stateKeyID=%q, expirationTimestamp=%s",
|
|
uid, errState, errGen, statusKeyID, resp.KeyID, state.KeyID, state.ExpirationTimestamp.Format(time.RFC3339))
|
|
}
|
|
|
|
// getCurrentState returns the latest state from the last status and encrypt calls.
|
|
// If the returned error is nil, the state is considered valid indefinitely for read requests.
|
|
// For write requests, the caller must also check that state.ValidateEncryptCapability does not error.
|
|
func (h *kmsv2PluginProbe) getCurrentState() (envelopekmsv2.State, error) {
|
|
state := *h.state.Load()
|
|
|
|
if state.Transformer == nil {
|
|
return envelopekmsv2.State{}, fmt.Errorf("got unexpected nil transformer")
|
|
}
|
|
|
|
if len(state.EncryptedDEK) == 0 {
|
|
return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty EncryptedDEK")
|
|
}
|
|
|
|
if len(state.KeyID) == 0 {
|
|
return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty keyID")
|
|
}
|
|
|
|
if state.ExpirationTimestamp.IsZero() {
|
|
return envelopekmsv2.State{}, fmt.Errorf("got unexpected zero expirationTimestamp")
|
|
}
|
|
|
|
if len(state.CacheKey) == 0 {
|
|
return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty cacheKey")
|
|
}
|
|
|
|
return state, nil
|
|
}
|
|
|
|
func (h *kmsv2PluginProbe) isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx context.Context, response *kmsservice.StatusResponse) error {
|
|
var errs []error
|
|
if response.Healthz != "ok" {
|
|
errs = append(errs, fmt.Errorf("got unexpected healthz status: %s", response.Healthz))
|
|
}
|
|
if response.Version != envelopekmsv2.KMSAPIVersion {
|
|
errs = append(errs, fmt.Errorf("expected KMSv2 API version %s, got %s", envelopekmsv2.KMSAPIVersion, response.Version))
|
|
}
|
|
|
|
if errCode, err := envelopekmsv2.ValidateKeyID(response.KeyID); err != nil {
|
|
metrics.RecordInvalidKeyIDFromStatus(h.name, string(errCode))
|
|
errs = append(errs, fmt.Errorf("got invalid KMSv2 KeyID %q: %w", response.KeyID, err))
|
|
} else {
|
|
metrics.RecordKeyIDFromStatus(h.name, response.KeyID)
|
|
// unconditionally append as we filter out nil errors below
|
|
errs = append(errs, h.rotateDEKOnKeyIDChange(ctx, response.KeyID, string(uuid.NewUUID())))
|
|
}
|
|
|
|
if err := utilerrors.Reduce(utilerrors.NewAggregate(errs)); err != nil {
|
|
return fmt.Errorf("kmsv2 Provider %s is not healthy, error: %w", h.name, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// loadConfig parses the encryption configuration file at filepath and returns the parsed config and hash of the file.
|
|
func loadConfig(filepath string, reload bool) (*apiserverconfig.EncryptionConfiguration, string, error) {
|
|
f, err := os.Open(filepath)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("error opening encryption provider configuration file %q: %w", filepath, err)
|
|
}
|
|
defer f.Close()
|
|
|
|
data, err := io.ReadAll(f)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("could not read contents: %w", err)
|
|
}
|
|
if len(data) == 0 {
|
|
return nil, "", fmt.Errorf("encryption provider configuration file %q is empty", filepath)
|
|
}
|
|
|
|
scheme := runtime.NewScheme()
|
|
codecs := serializer.NewCodecFactory(scheme)
|
|
utilruntime.Must(apiserverconfig.AddToScheme(scheme))
|
|
utilruntime.Must(apiserverconfigv1.AddToScheme(scheme))
|
|
|
|
configObj, gvk, err := codecs.UniversalDecoder().Decode(data, nil, nil)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("error decoding encryption provider configuration file %q: %w", filepath, err)
|
|
}
|
|
config, ok := configObj.(*apiserverconfig.EncryptionConfiguration)
|
|
if !ok {
|
|
return nil, "", fmt.Errorf("got unexpected config type: %v", gvk)
|
|
}
|
|
|
|
return config, computeEncryptionConfigHash(data), validation.ValidateEncryptionConfiguration(config, reload).ToAggregate()
|
|
}
|
|
|
|
// prefixTransformersAndProbes creates the set of transformers and KMS probes based on the given resource config.
|
|
// It may launch multiple go routines whose lifecycle is controlled by ctx.
|
|
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
|
|
func prefixTransformersAndProbes(ctx context.Context, config apiserverconfig.ResourceConfiguration) ([]value.PrefixTransformer, []healthChecker, *kmsState, error) {
|
|
var transformers []value.PrefixTransformer
|
|
var probes []healthChecker
|
|
var kmsUsed kmsState
|
|
|
|
for _, provider := range config.Providers {
|
|
provider := provider
|
|
var (
|
|
transformer value.PrefixTransformer
|
|
transformerErr error
|
|
probe healthChecker
|
|
used *kmsState
|
|
)
|
|
|
|
switch {
|
|
case provider.AESGCM != nil:
|
|
transformer, transformerErr = aesPrefixTransformer(provider.AESGCM, aestransformer.NewGCMTransformer, aesGCMTransformerPrefixV1)
|
|
|
|
case provider.AESCBC != nil:
|
|
cbcTransformer := func(block cipher.Block) (value.Transformer, error) {
|
|
return aestransformer.NewCBCTransformer(block), nil
|
|
}
|
|
transformer, transformerErr = aesPrefixTransformer(provider.AESCBC, cbcTransformer, aesCBCTransformerPrefixV1)
|
|
|
|
case provider.Secretbox != nil:
|
|
transformer, transformerErr = secretboxPrefixTransformer(provider.Secretbox)
|
|
|
|
case provider.KMS != nil:
|
|
transformer, probe, used, transformerErr = kmsPrefixTransformer(ctx, provider.KMS)
|
|
if transformerErr == nil {
|
|
probes = append(probes, probe)
|
|
kmsUsed.accumulate(used)
|
|
}
|
|
|
|
case provider.Identity != nil:
|
|
transformer = value.PrefixTransformer{
|
|
Transformer: identity.NewEncryptCheckTransformer(),
|
|
Prefix: []byte{},
|
|
}
|
|
|
|
default:
|
|
return nil, nil, nil, errors.New("provider does not contain any of the expected providers: KMS, AESGCM, AESCBC, Secretbox, Identity")
|
|
}
|
|
|
|
if transformerErr != nil {
|
|
return nil, nil, nil, transformerErr
|
|
}
|
|
|
|
transformers = append(transformers, transformer)
|
|
}
|
|
|
|
return transformers, probes, &kmsUsed, nil
|
|
}
|
|
|
|
type blockTransformerFunc func(cipher.Block) (value.Transformer, error)
|
|
|
|
func aesPrefixTransformer(config *apiserverconfig.AESConfiguration, fn blockTransformerFunc, prefix string) (value.PrefixTransformer, error) {
|
|
var result value.PrefixTransformer
|
|
|
|
if len(config.Keys) == 0 {
|
|
return result, fmt.Errorf("aes provider has no valid keys")
|
|
}
|
|
for _, key := range config.Keys {
|
|
key := key
|
|
if key.Name == "" {
|
|
return result, fmt.Errorf("key with invalid name provided")
|
|
}
|
|
if key.Secret == "" {
|
|
return result, fmt.Errorf("key %v has no provided secret", key.Name)
|
|
}
|
|
}
|
|
|
|
keyTransformers := []value.PrefixTransformer{}
|
|
|
|
for _, keyData := range config.Keys {
|
|
keyData := keyData
|
|
key, err := base64.StdEncoding.DecodeString(keyData.Secret)
|
|
if err != nil {
|
|
return result, fmt.Errorf("could not obtain secret for named key %s: %w", keyData.Name, err)
|
|
}
|
|
block, err := aes.NewCipher(key)
|
|
if err != nil {
|
|
return result, fmt.Errorf("error while creating cipher for named key %s: %w", keyData.Name, err)
|
|
}
|
|
transformer, err := fn(block)
|
|
if err != nil {
|
|
return result, fmt.Errorf("error while creating transformer for named key %s: %w", keyData.Name, err)
|
|
}
|
|
|
|
// Create a new PrefixTransformer for this key
|
|
keyTransformers = append(keyTransformers,
|
|
value.PrefixTransformer{
|
|
Transformer: transformer,
|
|
Prefix: []byte(keyData.Name + ":"),
|
|
})
|
|
}
|
|
|
|
// Create a prefixTransformer which can choose between these keys
|
|
keyTransformer := value.NewPrefixTransformers(
|
|
fmt.Errorf("no matching key was found for the provided AES transformer"), keyTransformers...)
|
|
|
|
// Create a PrefixTransformer which shall later be put in a list with other providers
|
|
result = value.PrefixTransformer{
|
|
Transformer: keyTransformer,
|
|
Prefix: []byte(prefix),
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func secretboxPrefixTransformer(config *apiserverconfig.SecretboxConfiguration) (value.PrefixTransformer, error) {
|
|
var result value.PrefixTransformer
|
|
|
|
if len(config.Keys) == 0 {
|
|
return result, fmt.Errorf("secretbox provider has no valid keys")
|
|
}
|
|
for _, key := range config.Keys {
|
|
key := key
|
|
if key.Name == "" {
|
|
return result, fmt.Errorf("key with invalid name provided")
|
|
}
|
|
if key.Secret == "" {
|
|
return result, fmt.Errorf("key %v has no provided secret", key.Name)
|
|
}
|
|
}
|
|
|
|
keyTransformers := []value.PrefixTransformer{}
|
|
|
|
for _, keyData := range config.Keys {
|
|
keyData := keyData
|
|
key, err := base64.StdEncoding.DecodeString(keyData.Secret)
|
|
if err != nil {
|
|
return result, fmt.Errorf("could not obtain secret for named key %s: %s", keyData.Name, err)
|
|
}
|
|
|
|
if len(key) != 32 {
|
|
return result, fmt.Errorf("expected key size 32 for secretbox provider, got %v", len(key))
|
|
}
|
|
|
|
keyArray := [32]byte{}
|
|
copy(keyArray[:], key)
|
|
|
|
// Create a new PrefixTransformer for this key
|
|
keyTransformers = append(keyTransformers,
|
|
value.PrefixTransformer{
|
|
Transformer: secretbox.NewSecretboxTransformer(keyArray),
|
|
Prefix: []byte(keyData.Name + ":"),
|
|
})
|
|
}
|
|
|
|
// Create a prefixTransformer which can choose between these keys
|
|
keyTransformer := value.NewPrefixTransformers(
|
|
fmt.Errorf("no matching key was found for the provided Secretbox transformer"), keyTransformers...)
|
|
|
|
// Create a PrefixTransformer which shall later be put in a list with other providers
|
|
result = value.PrefixTransformer{
|
|
Transformer: keyTransformer,
|
|
Prefix: []byte(secretboxTransformerPrefixV1),
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
var (
|
|
// The factory to create kms service. This is to make writing test easier.
|
|
envelopeServiceFactory = envelope.NewGRPCService
|
|
|
|
// The factory to create kmsv2 service. Exported for integration tests.
|
|
EnvelopeKMSv2ServiceFactory = envelopekmsv2.NewGRPCService
|
|
)
|
|
|
|
type kmsState struct {
|
|
v1Used, v2Used bool
|
|
kmsTimeoutSum time.Duration
|
|
}
|
|
|
|
// accumulate computes the KMS state by:
|
|
// - determining which KMS plugin versions are in use
|
|
// - calculating kmsTimeoutSum which is used as transformTracker.kmsCloseGracePeriod
|
|
// DynamicTransformers.Set waits for this period before closing old transformers after a config reload
|
|
func (s *kmsState) accumulate(other *kmsState) {
|
|
s.v1Used = s.v1Used || other.v1Used
|
|
s.v2Used = s.v2Used || other.v2Used
|
|
s.kmsTimeoutSum += other.kmsTimeoutSum
|
|
}
|
|
|
|
// kmsPrefixTransformer creates a KMS transformer and probe based on the given KMS config.
|
|
// It may launch multiple go routines whose lifecycle is controlled by ctx.
|
|
// In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched.
|
|
func kmsPrefixTransformer(ctx context.Context, config *apiserverconfig.KMSConfiguration) (value.PrefixTransformer, healthChecker, *kmsState, error) {
|
|
kmsName := config.Name
|
|
switch config.APIVersion {
|
|
case kmsAPIVersionV1:
|
|
envelopeService, err := envelopeServiceFactory(ctx, config.Endpoint, config.Timeout.Duration)
|
|
if err != nil {
|
|
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv1-Plugin's probe %q, error: %w", kmsName, err)
|
|
}
|
|
|
|
probe := &kmsPluginProbe{
|
|
name: kmsName,
|
|
ttl: kmsPluginHealthzNegativeTTL,
|
|
service: envelopeService,
|
|
l: &sync.Mutex{},
|
|
lastResponse: &kmsPluginHealthzResponse{},
|
|
}
|
|
|
|
transformer := envelopePrefixTransformer(config, envelopeService, kmsTransformerPrefixV1)
|
|
|
|
return transformer, probe, &kmsState{
|
|
v1Used: true,
|
|
// for v1 we will do encrypt and decrypt for health check. Since these are serial operations, we will double the timeout.
|
|
kmsTimeoutSum: 2 * config.Timeout.Duration,
|
|
}, nil
|
|
|
|
case kmsAPIVersionV2:
|
|
if !utilfeature.DefaultFeatureGate.Enabled(features.KMSv2) {
|
|
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2 plugin %q, KMSv2 feature is not enabled", kmsName)
|
|
}
|
|
|
|
envelopeService, err := EnvelopeKMSv2ServiceFactory(ctx, config.Endpoint, config.Name, config.Timeout.Duration)
|
|
if err != nil {
|
|
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2-Plugin's probe %q, error: %w", kmsName, err)
|
|
}
|
|
|
|
probe := &kmsv2PluginProbe{
|
|
name: kmsName,
|
|
ttl: kmsPluginHealthzNegativeTTL,
|
|
service: envelopeService,
|
|
l: &sync.Mutex{},
|
|
lastResponse: &kmsPluginHealthzResponse{},
|
|
}
|
|
// initialize state so that Load always works
|
|
probe.state.Store(&envelopekmsv2.State{})
|
|
|
|
runProbeCheckAndLog := func(ctx context.Context) error {
|
|
if err := probe.check(ctx); err != nil {
|
|
klog.VDepth(1, 2).ErrorS(err, "kms plugin failed health check probe", "name", kmsName)
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// on the happy path where the plugin is healthy and available on server start,
|
|
// prime keyID and DEK by running the check inline once (this also prevents unit tests from flaking)
|
|
// ignore the error here since we want to support the plugin starting up async with the API server
|
|
_ = runProbeCheckAndLog(ctx)
|
|
// make sure that the plugin's key ID is reasonably up-to-date
|
|
// also, make sure that our DEK is up-to-date to with said key ID (if it expires the server will fail all writes)
|
|
// if this background loop ever stops running, the server will become unfunctional after kmsv2PluginWriteDEKMaxTTL
|
|
go wait.PollUntilWithContext(
|
|
ctx,
|
|
kmsv2PluginHealthzPositiveInterval,
|
|
func(ctx context.Context) (bool, error) {
|
|
if err := runProbeCheckAndLog(ctx); err == nil {
|
|
return false, nil
|
|
}
|
|
|
|
// TODO add integration test for quicker error poll on failure
|
|
// if we fail, block the outer polling and start a new quicker poll inline
|
|
// this limits the chance that our DEK expires during a transient failure
|
|
_ = wait.PollUntilWithContext(
|
|
ctx,
|
|
kmsv2PluginHealthzNegativeInterval,
|
|
func(ctx context.Context) (bool, error) {
|
|
return runProbeCheckAndLog(ctx) == nil, nil
|
|
},
|
|
)
|
|
|
|
return false, nil
|
|
})
|
|
|
|
// using AES-GCM by default for encrypting data with KMSv2
|
|
transformer := value.PrefixTransformer{
|
|
Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, kmsName, probe.getCurrentState),
|
|
Prefix: []byte(kmsTransformerPrefixV2 + kmsName + ":"),
|
|
}
|
|
|
|
return transformer, probe, &kmsState{
|
|
v2Used: true,
|
|
kmsTimeoutSum: config.Timeout.Duration,
|
|
}, nil
|
|
|
|
default:
|
|
return value.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMS plugin %q, unsupported KMS API version %q", kmsName, config.APIVersion)
|
|
}
|
|
}
|
|
|
|
func envelopePrefixTransformer(config *apiserverconfig.KMSConfiguration, envelopeService envelope.Service, prefix string) value.PrefixTransformer {
|
|
baseTransformerFunc := func(block cipher.Block) (value.Transformer, error) {
|
|
gcm, err := aestransformer.NewGCMTransformer(block)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// v1.24: write using AES-CBC only but support reads via AES-CBC and AES-GCM (so we can move to AES-GCM)
|
|
// v1.25: write using AES-GCM only but support reads via AES-GCM and fallback to AES-CBC for backwards compatibility
|
|
// TODO(aramase): Post v1.25: We cannot drop CBC read support until we automate storage migration.
|
|
// We could have a release note that hard requires users to perform storage migration.
|
|
return unionTransformers{gcm, aestransformer.NewCBCTransformer(block)}, nil
|
|
}
|
|
|
|
return value.PrefixTransformer{
|
|
Transformer: envelope.NewEnvelopeTransformer(envelopeService, int(*config.CacheSize), baseTransformerFunc),
|
|
Prefix: []byte(prefix + config.Name + ":"),
|
|
}
|
|
}
|
|
|
|
type unionTransformers []value.Transformer
|
|
|
|
func (u unionTransformers) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) (out []byte, stale bool, err error) {
|
|
var errs []error
|
|
for i := range u {
|
|
transformer := u[i]
|
|
result, stale, err := transformer.TransformFromStorage(ctx, data, dataCtx)
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
continue
|
|
}
|
|
// when i != 0, we have transformed the data from storage using the new transformer,
|
|
// we want to issue a write to etcd even if the contents of the data haven't changed
|
|
return result, stale || i != 0, nil
|
|
}
|
|
if err := utilerrors.Reduce(utilerrors.NewAggregate(errs)); err != nil {
|
|
return nil, false, err
|
|
}
|
|
return nil, false, fmt.Errorf("unionTransformers: unable to transform from storage")
|
|
}
|
|
|
|
func (u unionTransformers) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) (out []byte, err error) {
|
|
return u[0].TransformToStorage(ctx, data, dataCtx)
|
|
}
|
|
|
|
// computeEncryptionConfigHash returns the expected hash for an encryption config file that has been loaded as bytes.
|
|
// We use a hash instead of the raw file contents when tracking changes to avoid holding any encryption keys in memory outside of their associated transformers.
|
|
// This hash must be used in-memory and not externalized to the process because it has no cross-release stability guarantees.
|
|
func computeEncryptionConfigHash(data []byte) string {
|
|
return fmt.Sprintf("%x", sha256.Sum256(data))
|
|
}
|
|
|
|
var _ ResourceTransformers = &DynamicTransformers{}
|
|
var _ healthz.HealthChecker = &DynamicTransformers{}
|
|
|
|
// DynamicTransformers holds transformers that may be dynamically updated via a single external actor, likely a controller.
|
|
// This struct must avoid locks (even read write locks) as it is inline to all calls to storage.
|
|
type DynamicTransformers struct {
|
|
transformTracker *atomic.Value
|
|
}
|
|
|
|
type transformTracker struct {
|
|
transformerOverrides map[schema.GroupResource]value.Transformer
|
|
kmsPluginHealthzCheck healthz.HealthChecker
|
|
closeTransformers context.CancelFunc
|
|
kmsCloseGracePeriod time.Duration
|
|
}
|
|
|
|
// NewDynamicTransformers returns transformers, health checks for kms providers and an ability to close transformers.
|
|
func NewDynamicTransformers(
|
|
transformerOverrides map[schema.GroupResource]value.Transformer,
|
|
kmsPluginHealthzCheck healthz.HealthChecker,
|
|
closeTransformers context.CancelFunc,
|
|
kmsCloseGracePeriod time.Duration,
|
|
) *DynamicTransformers {
|
|
dynamicTransformers := &DynamicTransformers{
|
|
transformTracker: &atomic.Value{},
|
|
}
|
|
|
|
tracker := &transformTracker{
|
|
transformerOverrides: transformerOverrides,
|
|
kmsPluginHealthzCheck: kmsPluginHealthzCheck,
|
|
closeTransformers: closeTransformers,
|
|
kmsCloseGracePeriod: kmsCloseGracePeriod,
|
|
}
|
|
dynamicTransformers.transformTracker.Store(tracker)
|
|
|
|
return dynamicTransformers
|
|
}
|
|
|
|
// Check implements healthz.HealthChecker
|
|
func (d *DynamicTransformers) Check(req *http.Request) error {
|
|
return d.transformTracker.Load().(*transformTracker).kmsPluginHealthzCheck.Check(req)
|
|
}
|
|
|
|
// Name implements healthz.HealthChecker
|
|
func (d *DynamicTransformers) Name() string {
|
|
return kmsReloadHealthCheckName
|
|
}
|
|
|
|
// TransformerForResource returns the transformer for the given resource.
|
|
func (d *DynamicTransformers) TransformerForResource(resource schema.GroupResource) value.Transformer {
|
|
return &resourceTransformer{
|
|
resource: resource,
|
|
transformTracker: d.transformTracker,
|
|
}
|
|
}
|
|
|
|
// Set sets the transformer overrides. This method is not go routine safe and must only be called by the same, single caller throughout the lifetime of this object.
|
|
func (d *DynamicTransformers) Set(
|
|
transformerOverrides map[schema.GroupResource]value.Transformer,
|
|
closeTransformers context.CancelFunc,
|
|
kmsPluginHealthzCheck healthz.HealthChecker,
|
|
kmsCloseGracePeriod time.Duration,
|
|
) {
|
|
// store new values
|
|
newTransformTracker := &transformTracker{
|
|
transformerOverrides: transformerOverrides,
|
|
closeTransformers: closeTransformers,
|
|
kmsPluginHealthzCheck: kmsPluginHealthzCheck,
|
|
kmsCloseGracePeriod: kmsCloseGracePeriod,
|
|
}
|
|
|
|
// update new transformer overrides
|
|
oldTransformTracker := d.transformTracker.Swap(newTransformTracker).(*transformTracker)
|
|
|
|
// close old transformers once we wait for grpc request to finish any in-flight requests.
|
|
// by the time we spawn this go routine, the new transformers have already been set and will be used for new requests.
|
|
// if the server starts shutting down during sleep duration then the transformers will correctly closed early because their lifetime is tied to the api-server drain notifier.
|
|
go func() {
|
|
time.Sleep(oldTransformTracker.kmsCloseGracePeriod)
|
|
oldTransformTracker.closeTransformers()
|
|
}()
|
|
}
|
|
|
|
var _ value.Transformer = &resourceTransformer{}
|
|
|
|
type resourceTransformer struct {
|
|
resource schema.GroupResource
|
|
transformTracker *atomic.Value
|
|
}
|
|
|
|
func (r *resourceTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, bool, error) {
|
|
return r.transformer().TransformFromStorage(ctx, data, dataCtx)
|
|
}
|
|
|
|
func (r *resourceTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx value.Context) ([]byte, error) {
|
|
return r.transformer().TransformToStorage(ctx, data, dataCtx)
|
|
}
|
|
|
|
func (r *resourceTransformer) transformer() value.Transformer {
|
|
return transformerFromOverrides(r.transformTracker.Load().(*transformTracker).transformerOverrides, r.resource)
|
|
}
|
|
|
|
type ResourceTransformers interface {
|
|
TransformerForResource(resource schema.GroupResource) value.Transformer
|
|
}
|
|
|
|
var _ ResourceTransformers = &StaticTransformers{}
|
|
|
|
type StaticTransformers map[schema.GroupResource]value.Transformer
|
|
|
|
func (s StaticTransformers) TransformerForResource(resource schema.GroupResource) value.Transformer {
|
|
return transformerFromOverrides(s, resource)
|
|
}
|
|
|
|
var anyGroupAnyResource = schema.GroupResource{
|
|
Group: "*",
|
|
Resource: "*",
|
|
}
|
|
|
|
func transformerFromOverrides(transformerOverrides map[schema.GroupResource]value.Transformer, resource schema.GroupResource) value.Transformer {
|
|
if transformer := transformerOverrides[resource]; transformer != nil {
|
|
return transformer
|
|
}
|
|
|
|
if transformer := transformerOverrides[schema.GroupResource{
|
|
Group: resource.Group,
|
|
Resource: "*",
|
|
}]; transformer != nil {
|
|
return transformer
|
|
}
|
|
|
|
if transformer := transformerOverrides[anyGroupAnyResource]; transformer != nil {
|
|
return transformer
|
|
}
|
|
|
|
return identity.NewEncryptCheckTransformer()
|
|
}
|
|
|
|
func grYAMLString(gr schema.GroupResource) string {
|
|
if gr.Group == "" && gr.Resource == "*" {
|
|
return "*."
|
|
}
|
|
|
|
return gr.String()
|
|
}
|