vendor update for CSI 0.3.0

This commit is contained in:
gman
2018-07-18 16:47:22 +02:00
parent 6f484f92fc
commit 8ea659f0d5
6810 changed files with 438061 additions and 193861 deletions

View File

@ -46,6 +46,7 @@ go_library(
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
"//pkg/kubelet/cadvisor:go_default_library",
"//pkg/kubelet/certificate:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/config:go_default_library",
"//pkg/kubelet/configmap:go_default_library",
@ -55,8 +56,6 @@ go_library(
"//pkg/kubelet/envvars:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/gpu:go_default_library",
"//pkg/kubelet/gpu/nvidia:go_default_library",
"//pkg/kubelet/images:go_default_library",
"//pkg/kubelet/kubeletconfig:go_default_library",
"//pkg/kubelet/kuberuntime:go_default_library",
@ -65,7 +64,6 @@ go_library(
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/metrics/collectors:go_default_library",
"//pkg/kubelet/mountpod:go_default_library",
"//pkg/kubelet/network:go_default_library",
"//pkg/kubelet/network/dns:go_default_library",
"//pkg/kubelet/pleg:go_default_library",
"//pkg/kubelet/pod:go_default_library",
@ -73,7 +71,6 @@ go_library(
"//pkg/kubelet/prober:go_default_library",
"//pkg/kubelet/prober/results:go_default_library",
"//pkg/kubelet/remote:go_default_library",
"//pkg/kubelet/rkt:go_default_library",
"//pkg/kubelet/secret:go_default_library",
"//pkg/kubelet/server:go_default_library",
"//pkg/kubelet/server/portforward:go_default_library",
@ -83,15 +80,19 @@ go_library(
"//pkg/kubelet/stats:go_default_library",
"//pkg/kubelet/status:go_default_library",
"//pkg/kubelet/sysctl:go_default_library",
"//pkg/kubelet/token:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/kubelet/util/manager:go_default_library",
"//pkg/kubelet/util/pluginwatcher:go_default_library",
"//pkg/kubelet/util/queue:go_default_library",
"//pkg/kubelet/util/sliceutils:go_default_library",
"//pkg/kubelet/volumemanager:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/security/apparmor:go_default_library",
"//pkg/security/podsecuritypolicy/sysctl:go_default_library",
"//pkg/securitycontext:go_default_library",
"//pkg/util/dbus:go_default_library",
"//pkg/util/file:go_default_library",
@ -103,6 +104,7 @@ go_library(
"//pkg/util/removeall:go_default_library",
"//pkg/version:go_default_library",
"//pkg/volume:go_default_library",
"//pkg/volume/csi:go_default_library",
"//pkg/volume/util:go_default_library",
"//pkg/volume/util/types:go_default_library",
"//pkg/volume/util/volumepathhandler:go_default_library",
@ -113,6 +115,7 @@ go_library(
"//vendor/github.com/google/cadvisor/events:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
"//vendor/k8s.io/api/authentication/v1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
@ -133,7 +136,6 @@ go_library(
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/client-go/tools/remotecommand:go_default_library",
"//vendor/k8s.io/client-go/util/certificate:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
"//vendor/k8s.io/client-go/util/integer:go_default_library",
@ -165,13 +167,11 @@ go_test(
}),
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/capabilities:go_default_library",
"//pkg/cloudprovider/providers/fake:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/kubelet/apis/cri/runtime/v1alpha2:go_default_library",
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
"//pkg/kubelet/cadvisor/testing:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/config:go_default_library",
@ -179,12 +179,9 @@ go_test(
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/container/testing:go_default_library",
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/gpu:go_default_library",
"//pkg/kubelet/images:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/logs:go_default_library",
"//pkg/kubelet/network:go_default_library",
"//pkg/kubelet/network/testing:go_default_library",
"//pkg/kubelet/pleg:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/pod/testing:go_default_library",
@ -197,14 +194,18 @@ go_test(
"//pkg/kubelet/stats:go_default_library",
"//pkg/kubelet/status:go_default_library",
"//pkg/kubelet/status/testing:go_default_library",
"//pkg/kubelet/token:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/queue:go_default_library",
"//pkg/kubelet/util/sliceutils:go_default_library",
"//pkg/kubelet/volumemanager:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/util/mount:go_default_library",
"//pkg/version:go_default_library",
"//pkg/volume:go_default_library",
"//pkg/volume/aws_ebs:go_default_library",
"//pkg/volume/azure_dd:go_default_library",
"//pkg/volume/gce_pd:go_default_library",
"//pkg/volume/host_path:go_default_library",
"//pkg/volume/testing:go_default_library",
"//pkg/volume/util:go_default_library",
@ -254,6 +255,7 @@ filegroup(
"//pkg/kubelet/cadvisor:all-srcs",
"//pkg/kubelet/certificate:all-srcs",
"//pkg/kubelet/checkpoint:all-srcs",
"//pkg/kubelet/checkpointmanager:all-srcs",
"//pkg/kubelet/client:all-srcs",
"//pkg/kubelet/cm:all-srcs",
"//pkg/kubelet/config:all-srcs",
@ -264,7 +266,6 @@ filegroup(
"//pkg/kubelet/envvars:all-srcs",
"//pkg/kubelet/events:all-srcs",
"//pkg/kubelet/eviction:all-srcs",
"//pkg/kubelet/gpu:all-srcs",
"//pkg/kubelet/images:all-srcs",
"//pkg/kubelet/kubeletconfig:all-srcs",
"//pkg/kubelet/kuberuntime:all-srcs",
@ -280,12 +281,12 @@ filegroup(
"//pkg/kubelet/prober:all-srcs",
"//pkg/kubelet/qos:all-srcs",
"//pkg/kubelet/remote:all-srcs",
"//pkg/kubelet/rkt:all-srcs",
"//pkg/kubelet/secret:all-srcs",
"//pkg/kubelet/server:all-srcs",
"//pkg/kubelet/stats:all-srcs",
"//pkg/kubelet/status:all-srcs",
"//pkg/kubelet/sysctl:all-srcs",
"//pkg/kubelet/token:all-srcs",
"//pkg/kubelet/types:all-srcs",
"//pkg/kubelet/util:all-srcs",
"//pkg/kubelet/volumemanager:all-srcs",

View File

@ -41,6 +41,7 @@ filegroup(
"//pkg/kubelet/apis/deviceplugin/v1alpha:all-srcs",
"//pkg/kubelet/apis/deviceplugin/v1beta1:all-srcs",
"//pkg/kubelet/apis/kubeletconfig:all-srcs",
"//pkg/kubelet/apis/pluginregistration/v1alpha1:all-srcs",
"//pkg/kubelet/apis/stats/v1alpha1:all-srcs",
],
tags = ["automanaged"],

File diff suppressed because it is too large Load Diff

View File

@ -171,7 +171,9 @@ enum MountPropagation {
message Mount {
// Path of the mount within the container.
string container_path = 1;
// Path of the mount on the host.
// Path of the mount on the host. If the hostPath doesn't exist, then runtimes
// should report error. If the hostpath is a symbolic link, runtimes should
// follow the symlink and mount the real destination to container.
string host_path = 2;
// If set, the mount is read-only.
bool readonly = 3;
@ -235,6 +237,9 @@ message LinuxSandboxSecurityContext {
SELinuxOption selinux_options = 2;
// UID to run sandbox processes as, when applicable.
Int64Value run_as_user = 3;
// GID to run sandbox processes as, when applicable. run_as_group should only
// be specified when run_as_user is specified; otherwise, the runtime MUST error.
Int64Value run_as_group = 8;
// If set, the root filesystem of the sandbox is read-only.
bool readonly_rootfs = 4;
// List of groups applied to the first process run in the sandbox, in
@ -247,7 +252,7 @@ message LinuxSandboxSecurityContext {
// privileged containers are expected to be run.
bool privileged = 6;
// Seccomp profile for the sandbox, candidate values are:
// * docker/default: the default profile for the docker container runtime
// * runtime/default: the default profile for the container runtime
// * unconfined: unconfined profile, ie, no seccomp sandboxing
// * localhost/<full-path-to-profile>: the profile installed on the node.
// <full-path-to-profile> is the full path of the profile.
@ -302,7 +307,7 @@ message PodSandboxConfig {
// structured logs, systemd-journald journal files, gRPC trace files, etc.
// E.g.,
// PodSandboxConfig.LogDirectory = `/var/log/pods/<podUID>/`
// ContainerConfig.LogPath = `containerName_Instance#.log`
// ContainerConfig.LogPath = `containerName/Instance#.log`
//
// WARNING: Log management and how kubelet should interface with the
// container logs are under active discussion in
@ -551,6 +556,10 @@ message LinuxContainerSecurityContext {
// UID to run the container process as. Only one of run_as_user and
// run_as_username can be specified at a time.
Int64Value run_as_user = 5;
// GID to run the container process as. run_as_group should only be specified
// when run_as_user or run_as_username is specified; otherwise, the runtime
// MUST error.
Int64Value run_as_group = 12;
// User name to run the container process as. If specified, the user MUST
// exist in the container image (i.e. in the /etc/passwd inside the image),
// and be resolved there by the runtime; otherwise, the runtime MUST error.
@ -568,7 +577,7 @@ message LinuxContainerSecurityContext {
// http://wiki.apparmor.net/index.php/AppArmor_Core_Policy_Reference
string apparmor_profile = 9;
// Seccomp profile for the container, candidate values are:
// * docker/default: the default profile for the docker container runtime
// * runtime/default: the default profile for the container runtime
// * unconfined: unconfined profile, ie, no seccomp sandboxing
// * localhost/<full-path-to-profile>: the profile installed on the node.
// <full-path-to-profile> is the full path of the profile.
@ -588,11 +597,21 @@ message LinuxContainerConfig {
LinuxContainerSecurityContext security_context = 2;
}
// WindowsContainerSecurityContext holds windows security configuration that will be applied to a container.
message WindowsContainerSecurityContext {
// User name to run the container process as. If specified, the user MUST
// exist in the container image and be resolved there by the runtime;
// otherwise, the runtime MUST return error.
string run_as_username = 1;
}
// WindowsContainerConfig contains platform-specific configuration for
// Windows-based containers.
message WindowsContainerConfig {
// Resources specification for the container.
WindowsContainerResources resources = 1;
// WindowsContainerSecurityContext configuration for the container.
WindowsContainerSecurityContext security_context = 2;
}
// WindowsContainerResources specifies Windows specific configuration for
@ -677,7 +696,7 @@ message ContainerConfig {
// the log (STDOUT and STDERR) on the host.
// E.g.,
// PodSandboxConfig.LogDirectory = `/var/log/pods/<podUID>/`
// ContainerConfig.LogPath = `containerName_Instance#.log`
// ContainerConfig.LogPath = `containerName/Instance#.log`
//
// WARNING: Log management and how kubelet should interface with the
// container logs are under active discussion in
@ -1038,7 +1057,8 @@ message RemoveImageRequest {
message RemoveImageResponse {}
message NetworkConfig {
// CIDR to use for pod IP addresses.
// CIDR to use for pod IP addresses. If the CIDR is empty, runtimes
// should omit it.
string pod_cidr = 1;
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -17,6 +17,7 @@ go_library(
],
importpath = "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig",
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",

View File

@ -63,6 +63,9 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
obj.CPUManagerPolicy = "none"
obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency
obj.QOSReserved = map[string]string{
"memory": "50%",
}
obj.OOMScoreAdj = int32(qos.KubeletOOMScoreAdj)
obj.Port = ports.KubeletPort
obj.ReadOnlyPort = ports.KubeletReadOnlyPort

View File

@ -147,6 +147,7 @@ var (
"CPUCFSQuota",
"CPUManagerPolicy",
"CPUManagerReconcilePeriod.Duration",
"QOSReserved[*]",
"CgroupDriver",
"CgroupRoot",
"CgroupsPerQOS",
@ -187,6 +188,8 @@ var (
"KubeReserved[*]",
"KubeletCgroups",
"MakeIPTablesUtilChains",
"RotateCertificates",
"ServerTLSBootstrap",
"StaticPodURL",
"StaticPodURLHeader[*][*]",
"MaxOpenFiles",

View File

@ -46,6 +46,7 @@ func addKnownTypes(scheme *runtime.Scheme) error {
// TODO this will get cleaned up with the scheme types are fixed
scheme.AddKnownTypes(SchemeGroupVersion,
&KubeletConfiguration{},
&SerializedNodeConfigSource{},
)
return nil
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package kubeletconfig
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -81,6 +82,17 @@ type KubeletConfiguration struct {
// TLSMinVersion is the minimum TLS version supported.
// Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants).
TLSMinVersion string
// rotateCertificates enables client certificate rotation. The Kubelet will request a
// new certificate from the certificates.k8s.io API. This requires an approver to approve the
// certificate signing requests. The RotateKubeletClientCertificate feature
// must be enabled.
RotateCertificates bool
// serverTLSBootstrap enables server certificate bootstrap. Instead of self
// signing a serving certificate, the Kubelet will request a certificate from
// the certificates.k8s.io API. This requires an approver to approve the
// certificate signing requests. The RotateKubeletServerCertificate feature
// must be enabled.
ServerTLSBootstrap bool
// authentication specifies how requests to the Kubelet's server are authenticated
Authentication KubeletAuthentication
// authorization specifies how requests to the Kubelet's server are authorized
@ -160,6 +172,9 @@ type KubeletConfiguration struct {
// CPU Manager reconciliation period.
// Requires the CPUManager feature gate to be enabled.
CPUManagerReconcilePeriod metav1.Duration
// Map of QoS resource reservation percentages (memory only for now).
// Requires the QOSReserved feature gate to be enabled.
QOSReserved map[string]string
// runtimeRequestTimeout is the timeout for all runtime requests except long running
// requests - pull, logs, exec and attach.
RuntimeRequestTimeout metav1.Duration
@ -245,7 +260,7 @@ type KubeletConfiguration struct {
// Maximum number of container log files that can be present for a container.
ContainerLogMaxFiles int32
/* following flags are meant for Node Allocatable */
/* the following fields are meant for Node Allocatable */
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
@ -324,3 +339,15 @@ type KubeletAnonymousAuthentication struct {
// Anonymous requests have a username of system:anonymous, and a group name of system:unauthenticated.
Enabled bool
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// SerializedNodeConfigSource allows us to serialize NodeConfigSource
// This type is used internally by the Kubelet for tracking checkpointed dynamic configs.
// It exists in the kubeletconfig API group because it is classified as a versioned input to the Kubelet.
type SerializedNodeConfigSource struct {
metav1.TypeMeta
// Source is the source that we are serializing
// +optional
Source v1.NodeConfigSource
}

View File

@ -23,6 +23,7 @@ go_library(
"//pkg/kubelet/types:go_default_library",
"//pkg/master/ports:go_default_library",
"//pkg/util/pointer:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/conversion:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",

View File

@ -45,6 +45,7 @@ func init() {
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&KubeletConfiguration{},
&SerializedNodeConfigSource{},
)
return nil
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package v1beta1
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -46,68 +47,124 @@ type KubeletConfiguration struct {
// staticPodPath is the path to the directory containing local (static) pods to
// run, or the path to a single static pod file.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// the set of static pods specified at the new path may be different than the
// ones the Kubelet initially started with, and this may disrupt your node.
// Default: ""
// +optional
StaticPodPath string `json:"staticPodPath,omitempty"`
// syncFrequency is the max period between synchronizing running
// containers and config.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// shortening this duration may have a negative performance impact, especially
// as the number of Pods on the node increases. Alternatively, increasing this
// duration will result in longer refresh times for ConfigMaps and Secrets.
// Default: "1m"
// +optional
SyncFrequency metav1.Duration `json:"syncFrequency,omitempty"`
// fileCheckFrequency is the duration between checking config files for
// new data
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// shortening the duration will cause the Kubelet to reload local Static Pod
// configurations more frequently, which may have a negative performance impact.
// Default: "20s"
// +optional
FileCheckFrequency metav1.Duration `json:"fileCheckFrequency,omitempty"`
// httpCheckFrequency is the duration between checking http for new data
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// shortening the duration will cause the Kubelet to poll staticPodURL more
// frequently, which may have a negative performance impact.
// Default: "20s"
// +optional
HTTPCheckFrequency metav1.Duration `json:"httpCheckFrequency,omitempty"`
// staticPodURL is the URL for accessing static pods to run
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// the set of static pods specified at the new URL may be different than the
// ones the Kubelet initially started with, and this may disrupt your node.
// Default: ""
// +optional
StaticPodURL string `json:"staticPodURL,omitempty"`
// staticPodURLHeader is a map of slices with HTTP headers to use when accessing the podURL
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt the ability to read the latest set of static pods from StaticPodURL.
// Default: nil
// +optional
StaticPodURLHeader map[string][]string `json:"staticPodURLHeader,omitempty"`
// address is the IP address for the Kubelet to serve on (set to 0.0.0.0
// for all interfaces).
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: "0.0.0.0"
// +optional
Address string `json:"address,omitempty"`
// port is the port for the Kubelet to serve on.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: 10250
// +optional
Port int32 `json:"port,omitempty"`
// readOnlyPort is the read-only port for the Kubelet to serve on with
// no authentication/authorization.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: 0 (disabled)
// +optional
ReadOnlyPort int32 `json:"readOnlyPort,omitempty"`
// tlsCertFile is the file containing x509 Certificate for HTTPS. (CA cert,
// tlsCertFile is the file containing x509 Certificate for HTTPS. (CA cert,
// if any, concatenated after server cert). If tlsCertFile and
// tlsPrivateKeyFile are not provided, a self-signed certificate
// and key are generated for the public address and saved to the directory
// passed to the Kubelet's --cert-dir flag.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: ""
// +optional
TLSCertFile string `json:"tlsCertFile,omitempty"`
// tlsPrivateKeyFile is the file containing x509 private key matching tlsCertFile
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: ""
// +optional
TLSPrivateKeyFile string `json:"tlsPrivateKeyFile,omitempty"`
// TLSCipherSuites is the list of allowed cipher suites for the server.
// Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants).
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: nil
// +optional
TLSCipherSuites []string `json:"tlsCipherSuites,omitempty"`
// TLSMinVersion is the minimum TLS version supported.
// Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants).
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: ""
// +optional
TLSMinVersion string `json:"tlsMinVersion,omitempty"`
// rotateCertificates enables client certificate rotation. The Kubelet will request a
// new certificate from the certificates.k8s.io API. This requires an approver to approve the
// certificate signing requests. The RotateKubeletClientCertificate feature
// must be enabled.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// disabling it may disrupt the Kubelet's ability to authenticate with the API server
// after the current certificate expires.
// Default: false
// +optional
RotateCertificates bool `json:"rotateCertificates,omitempty"`
// serverTLSBootstrap enables server certificate bootstrap. Instead of self
// signing a serving certificate, the Kubelet will request a certificate from
// the certificates.k8s.io API. This requires an approver to approve the
// certificate signing requests. The RotateKubeletServerCertificate feature
// must be enabled.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// disabling it will stop the renewal of Kubelet server certificates, which can
// disrupt components that interact with the Kubelet server in the long term,
// due to certificate expiration.
// Default: false
// +optional
ServerTLSBootstrap bool `json:"serverTLSBootstrap,omitempty"`
// authentication specifies how requests to the Kubelet's server are authenticated
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Defaults:
// anonymous:
// enabled: false
@ -117,6 +174,8 @@ type KubeletConfiguration struct {
// +optional
Authentication KubeletAuthentication `json:"authentication"`
// authorization specifies how requests to the Kubelet's server are authorized
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Defaults:
// mode: Webhook
// webhook:
@ -126,131 +185,202 @@ type KubeletConfiguration struct {
Authorization KubeletAuthorization `json:"authorization"`
// registryPullQPS is the limit of registry pulls per second.
// Set to 0 for no limit.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact scalability by changing the amount of traffic produced
// by image pulls.
// Default: 5
// +optional
RegistryPullQPS *int32 `json:"registryPullQPS,omitempty"`
// registryBurst is the maximum size of bursty pulls, temporarily allows
// pulls to burst to this number, while still not exceeding registryPullQPS.
// Only used if registryPullQPS > 0.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact scalability by changing the amount of traffic produced
// by image pulls.
// Default: 10
// +optional
RegistryBurst int32 `json:"registryBurst,omitempty"`
// eventRecordQPS is the maximum event creations per second. If 0, there
// is no limit enforced.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact scalability by changing the amount of traffic produced by
// event creations.
// Default: 5
// +optional
EventRecordQPS *int32 `json:"eventRecordQPS,omitempty"`
// eventBurst is the maximum size of a burst of event creations, temporarily
// allows event creations to burst to this number, while still not exceeding
// eventRecordQPS. Only used if eventRecordQPS > 0.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact scalability by changing the amount of traffic produced by
// event creations.
// Default: 10
// +optional
EventBurst int32 `json:"eventBurst,omitempty"`
// enableDebuggingHandlers enables server endpoints for log collection
// and local running of containers and commands
// enableDebuggingHandlers enables server endpoints for log access
// and local running of containers and commands, including the exec,
// attach, logs, and portforward features.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// disabling it may disrupt components that interact with the Kubelet server.
// Default: true
// +optional
EnableDebuggingHandlers *bool `json:"enableDebuggingHandlers,omitempty"`
// enableContentionProfiling enables lock contention profiling, if enableDebuggingHandlers is true.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// enabling it may carry a performance impact.
// Default: false
// +optional
EnableContentionProfiling bool `json:"enableContentionProfiling,omitempty"`
// healthzPort is the port of the localhost healthz endpoint (set to 0 to disable)
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that monitor Kubelet health.
// Default: 10248
// +optional
HealthzPort *int32 `json:"healthzPort,omitempty"`
// healthzBindAddress is the IP address for the healthz server to serve on
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that monitor Kubelet health.
// Default: "127.0.0.1"
// +optional
HealthzBindAddress string `json:"healthzBindAddress,omitempty"`
// oomScoreAdj is The oom-score-adj value for kubelet process. Values
// must be within the range [-1000, 1000].
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact the stability of nodes under memory pressure.
// Default: -999
// +optional
OOMScoreAdj *int32 `json:"oomScoreAdj,omitempty"`
// clusterDomain is the DNS domain for this cluster. If set, kubelet will
// configure all containers to search this domain in addition to the
// host's search domains.
// Dynamic Kubelet Config (beta): Dynamically updating this field is not recommended,
// as it should be kept in sync with the rest of the cluster.
// Default: ""
// +optional
ClusterDomain string `json:"clusterDomain,omitempty"`
// clusterDNS is a list of IP addresses for the cluster DNS server. If set,
// kubelet will configure all containers to use this for DNS resolution
// instead of the host's DNS servers.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// changes will only take effect on Pods created after the update. Draining
// the node is recommended before changing this field.
// Default: nil
// +optional
ClusterDNS []string `json:"clusterDNS,omitempty"`
// streamingConnectionIdleTimeout is the maximum time a streaming connection
// can be idle before the connection is automatically closed.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact components that rely on infrequent updates over streaming
// connections to the Kubelet server.
// Default: "4h"
// +optional
StreamingConnectionIdleTimeout metav1.Duration `json:"streamingConnectionIdleTimeout,omitempty"`
// nodeStatusUpdateFrequency is the frequency that kubelet posts node
// status to master. Note: be cautious when changing the constant, it
// must work with nodeMonitorGracePeriod in nodecontroller.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact node scalability, and also that the node controller's
// nodeMonitorGracePeriod must be set to N*NodeStatusUpdateFrequency,
// where N is the number of retries before the node controller marks
// the node unhealthy.
// Default: "10s"
// +optional
NodeStatusUpdateFrequency metav1.Duration `json:"nodeStatusUpdateFrequency,omitempty"`
// imageMinimumGCAge is the minimum age for an unused image before it is
// garbage collected.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may trigger or delay garbage collection, and may change the image overhead
// on the node.
// Default: "2m"
// +optional
ImageMinimumGCAge metav1.Duration `json:"imageMinimumGCAge,omitempty"`
// imageGCHighThresholdPercent is the percent of disk usage after which
// image garbage collection is always run. The percent is calculated as
// this field value out of 100.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may trigger or delay garbage collection, and may change the image overhead
// on the node.
// Default: 85
// +optional
ImageGCHighThresholdPercent *int32 `json:"imageGCHighThresholdPercent,omitempty"`
// imageGCLowThresholdPercent is the percent of disk usage before which
// image garbage collection is never run. Lowest disk usage to garbage
// collect to. The percent is calculated as this field value out of 100.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may trigger or delay garbage collection, and may change the image overhead
// on the node.
// Default: 80
// +optional
ImageGCLowThresholdPercent *int32 `json:"imageGCLowThresholdPercent,omitempty"`
// How frequently to calculate and cache volume disk usage for all pods
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// shortening the period may carry a performance impact.
// Default: "1m"
// +optional
VolumeStatsAggPeriod metav1.Duration `json:"volumeStatsAggPeriod,omitempty"`
// kubeletCgroups is the absolute name of cgroups to isolate the kubelet in
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: ""
// +optional
KubeletCgroups string `json:"kubeletCgroups,omitempty"`
// systemCgroups is absolute name of cgroups in which to place
// all non-kernel processes that are not already in a container. Empty
// for no container. Rolling back the flag requires a reboot.
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: ""
// +optional
SystemCgroups string `json:"systemCgroups,omitempty"`
// cgroupRoot is the root cgroup to use for pods. This is handled by the
// container runtime on a best effort basis.
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: ""
// +optional
CgroupRoot string `json:"cgroupRoot,omitempty"`
// Enable QoS based Cgroup hierarchy: top level cgroups for QoS Classes
// And all Burstable and BestEffort pods are brought up under their
// specific top level QoS cgroup.
// +optional
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: true
// +optional
CgroupsPerQOS *bool `json:"cgroupsPerQOS,omitempty"`
// driver that the kubelet uses to manipulate cgroups on the host (cgroupfs or systemd)
// +optional
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: "cgroupfs"
// +optional
CgroupDriver string `json:"cgroupDriver,omitempty"`
// CPUManagerPolicy is the name of the policy to use.
// Requires the CPUManager feature gate to be enabled.
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: "none"
// +optional
CPUManagerPolicy string `json:"cpuManagerPolicy,omitempty"`
// CPU Manager reconciliation period.
// Requires the CPUManager feature gate to be enabled.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// shortening the period may carry a performance impact.
// Default: "10s"
// +optional
CPUManagerReconcilePeriod metav1.Duration `json:"cpuManagerReconcilePeriod,omitempty"`
// qosReserved is a set of resource name to percentage pairs that specify
// the minimum percentage of a resource reserved for exclusive use by the
// guaranteed QoS tier.
// Currently supported resources: "memory"
// Requires the QOSReserved feature gate to be enabled.
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: nil
// +optional
QOSReserved map[string]string `json:"qosReserved,omitempty"`
// runtimeRequestTimeout is the timeout for all runtime requests except long running
// requests - pull, logs, exec and attach.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may disrupt components that interact with the Kubelet server.
// Default: "2m"
// +optional
RuntimeRequestTimeout metav1.Duration `json:"runtimeRequestTimeout,omitempty"`
@ -263,46 +393,74 @@ type KubeletConfiguration struct {
// "none": do nothing.
// Generally, one must set --hairpin-mode=hairpin-veth to achieve hairpin NAT,
// because promiscuous-bridge assumes the existence of a container bridge named cbr0.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may require a node reboot, depending on the network plugin.
// Default: "promiscuous-bridge"
// +optional
HairpinMode string `json:"hairpinMode,omitempty"`
// maxPods is the number of pods that can run on this Kubelet.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// changes may cause Pods to fail admission on Kubelet restart, and may change
// the value reported in Node.Status.Capacity[v1.ResourcePods], thus affecting
// future scheduling decisions. Increasing this value may also decrease performance,
// as more Pods can be packed into a single node.
// Default: 110
// +optional
MaxPods int32 `json:"maxPods,omitempty"`
// The CIDR to use for pod IP addresses, only used in standalone mode.
// In cluster mode, this is obtained from the master.
// Dynamic Kubelet Config (beta): This field should always be set to the empty default.
// It should only set for standalone Kubelets, which cannot use Dynamic Kubelet Config.
// Default: ""
// +optional
PodCIDR string `json:"podCIDR,omitempty"`
// PodPidsLimit is the maximum number of pids in any pod.
// Requires the SupportPodPidsLimit feature gate to be enabled.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// lowering it may prevent container processes from forking after the change.
// Default: -1
// +optional
PodPidsLimit *int64 `json:"podPidsLimit,omitempty"`
// ResolverConfig is the resolver configuration file used as the basis
// for the container DNS resolution configuration.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// changes will only take effect on Pods created after the update. Draining
// the node is recommended before changing this field.
// Default: "/etc/resolv.conf"
// +optional
ResolverConfig string `json:"resolvConf,omitempty"`
// cpuCFSQuota enables CPU CFS quota enforcement for containers that
// specify CPU limits.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// disabling it may reduce node stability.
// Default: true
// +optional
CPUCFSQuota *bool `json:"cpuCFSQuota,omitempty"`
// maxOpenFiles is Number of files that can be opened by Kubelet process.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact the ability of the Kubelet to interact with the node's filesystem.
// Default: 1000000
// +optional
MaxOpenFiles int64 `json:"maxOpenFiles,omitempty"`
// contentType is contentType of requests sent to apiserver.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact the ability for the Kubelet to communicate with the API server.
// If the Kubelet loses contact with the API server due to a change to this field,
// the change cannot be reverted via dynamic Kubelet config.
// Default: "application/vnd.kubernetes.protobuf"
// +optional
ContentType string `json:"contentType,omitempty"`
// kubeAPIQPS is the QPS to use while talking with kubernetes apiserver
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact scalability by changing the amount of traffic the Kubelet
// sends to the API server.
// Default: 5
// +optional
KubeAPIQPS *int32 `json:"kubeAPIQPS,omitempty"`
// kubeAPIBurst is the burst to allow while talking with kubernetes apiserver
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact scalability by changing the amount of traffic the Kubelet
// sends to the API server.
// Default: 10
// +optional
KubeAPIBurst int32 `json:"kubeAPIBurst,omitempty"`
@ -310,11 +468,15 @@ type KubeletConfiguration struct {
// at a time. We recommend *not* changing the default value on nodes that
// run docker daemon with version < 1.9 or an Aufs storage backend.
// Issue #10959 has more details.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact the performance of image pulls.
// Default: true
// +optional
SerializeImagePulls *bool `json:"serializeImagePulls,omitempty"`
// Map of signal names to quantities that defines hard eviction thresholds. For example: {"memory.available": "300Mi"}.
// To explicitly disable, pass a 0% or 100% threshold on an arbitrary resource.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may trigger or delay Pod evictions.
// Default:
// memory.available: "100Mi"
// nodefs.available: "10%"
@ -322,49 +484,83 @@ type KubeletConfiguration struct {
// imagefs.available: "15%"
// +optional
EvictionHard map[string]string `json:"evictionHard,omitempty"`
// Map of signal names to quantities that defines soft eviction thresholds. For example: {"memory.available": "300Mi"}.
// Map of signal names to quantities that defines soft eviction thresholds.
// For example: {"memory.available": "300Mi"}.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may trigger or delay Pod evictions, and may change the allocatable reported
// by the node.
// Default: nil
// +optional
EvictionSoft map[string]string `json:"evictionSoft,omitempty"`
// Map of signal names to quantities that defines grace periods for each soft eviction signal. For example: {"memory.available": "30s"}.
// Map of signal names to quantities that defines grace periods for each soft eviction signal.
// For example: {"memory.available": "30s"}.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may trigger or delay Pod evictions.
// Default: nil
// +optional
EvictionSoftGracePeriod map[string]string `json:"evictionSoftGracePeriod,omitempty"`
// Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// lowering it may decrease the stability of the node when the node is overcommitted.
// Default: "5m"
// +optional
EvictionPressureTransitionPeriod metav1.Duration `json:"evictionPressureTransitionPeriod,omitempty"`
// Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met.
// Maximum allowed grace period (in seconds) to use when terminating pods in
// response to a soft eviction threshold being met. This value effectively caps
// the Pod's TerminationGracePeriodSeconds value during soft evictions.
// Note: Due to issue #64530, the behavior has a bug where this value currently just
// overrides the grace period during soft eviction, which can increase the grace
// period from what is set on the Pod. This bug will be fixed in a future release.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// lowering it decreases the amount of time Pods will have to gracefully clean
// up before being killed during a soft eviction.
// Default: 0
// +optional
EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod,omitempty"`
// Map of signal names to quantities that defines minimum reclaims, which describe the minimum
// amount of a given resource the kubelet will reclaim when performing a pod eviction while
// that resource is under pressure. For example: {"imagefs.available": "2Gi"}
// +optional
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may change how well eviction can manage resource pressure.
// Default: nil
// +optional
EvictionMinimumReclaim map[string]string `json:"evictionMinimumReclaim,omitempty"`
// podsPerCore is the maximum number of pods per core. Cannot exceed MaxPods.
// If 0, this field is ignored.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// changes may cause Pods to fail admission on Kubelet restart, and may change
// the value reported in Node.Status.Capacity[v1.ResourcePods], thus affecting
// future scheduling decisions. Increasing this value may also decrease performance,
// as more Pods can be packed into a single node.
// Default: 0
// +optional
PodsPerCore int32 `json:"podsPerCore,omitempty"`
// enableControllerAttachDetach enables the Attach/Detach controller to
// manage attachment/detachment of volumes scheduled to this node, and
// disables kubelet from executing any attach/detach operations
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// changing which component is responsible for volume management on a live node
// may result in volumes refusing to detach if the node is not drained prior to
// the update, and if Pods are scheduled to the node before the
// volumes.kubernetes.io/controller-managed-attach-detach annotation is updated by the
// Kubelet. In general, it is safest to leave this value set the same as local config.
// Default: true
// +optional
EnableControllerAttachDetach *bool `json:"enableControllerAttachDetach,omitempty"`
// protectKernelDefaults, if true, causes the Kubelet to error if kernel
// flags are not as it expects. Otherwise the Kubelet will attempt to modify
// kernel flags to match its expectation.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// enabling it may cause the Kubelet to crash-loop if the Kernel is not configured as
// Kubelet expects.
// Default: false
// +optional
ProtectKernelDefaults bool `json:"protectKernelDefaults,omitempty"`
// If true, Kubelet ensures a set of iptables rules are present on host.
// These rules will serve as utility rules for various components, e.g. KubeProxy.
// The rules will be created based on IPTablesMasqueradeBit and IPTablesDropBit.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// disabling it will prevent the Kubelet from healing locally misconfigured iptables rules.
// Default: true
// +optional
MakeIPTablesUtilChains *bool `json:"makeIPTablesUtilChains,omitempty"`
@ -372,39 +568,61 @@ type KubeletConfiguration struct {
// Values must be within the range [0, 31]. Must be different from other mark bits.
// Warning: Please match the value of the corresponding parameter in kube-proxy.
// TODO: clean up IPTablesMasqueradeBit in kube-proxy
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it needs to be coordinated with other components, like kube-proxy, and the update
// will only be effective if MakeIPTablesUtilChains is enabled.
// Default: 14
// +optional
IPTablesMasqueradeBit *int32 `json:"iptablesMasqueradeBit,omitempty"`
// iptablesDropBit is the bit of the iptables fwmark space to mark for dropping packets.
// Values must be within the range [0, 31]. Must be different from other mark bits.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it needs to be coordinated with other components, like kube-proxy, and the update
// will only be effective if MakeIPTablesUtilChains is enabled.
// Default: 15
// +optional
IPTablesDropBit *int32 `json:"iptablesDropBit,omitempty"`
// featureGates is a map of feature names to bools that enable or disable alpha/experimental
// features. This field modifies piecemeal the built-in default values from
// "k8s.io/kubernetes/pkg/features/kube_features.go".
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider the
// documentation for the features you are enabling or disabling. While we
// encourage feature developers to make it possible to dynamically enable
// and disable features, some changes may require node reboots, and some
// features may require careful coordination to retroactively disable.
// Default: nil
// +optional
FeatureGates map[string]bool `json:"featureGates,omitempty"`
// failSwapOn tells the Kubelet to fail to start if swap is enabled on the node.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// setting it to true will cause the Kubelet to crash-loop if swap is enabled.
// Default: true
// +optional
FailSwapOn *bool `json:"failSwapOn,omitempty"`
// A quantity defines the maximum size of the container log file before it is rotated. For example: "5Mi" or "256Ki".
// A quantity defines the maximum size of the container log file before it is rotated.
// For example: "5Mi" or "256Ki".
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may trigger log rotation.
// Default: "10Mi"
// +optional
ContainerLogMaxSize string `json:"containerLogMaxSize,omitempty"`
// Maximum number of container log files that can be present for a container.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// lowering it may cause log files to be deleted.
// Default: 5
// +optional
ContainerLogMaxFiles *int32 `json:"containerLogMaxFiles,omitempty"`
/* following flags are meant for Node Allocatable */
/* the following fields are meant for Node Allocatable */
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// systemReserved is a set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G)
// pairs that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported.
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may not be possible to increase the reserved resources, because this
// requires resizing cgroups. Always look for a NodeAllocatableEnforced event
// after updating this field to ensure that the update was successful.
// Default: nil
// +optional
SystemReserved map[string]string `json:"systemReserved,omitempty"`
@ -412,16 +630,24 @@ type KubeletConfiguration struct {
// that describe resources reserved for kubernetes system components.
// Currently cpu, memory and local storage for root file system are supported.
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may not be possible to increase the reserved resources, because this
// requires resizing cgroups. Always look for a NodeAllocatableEnforced event
// after updating this field to ensure that the update was successful.
// Default: nil
// +optional
KubeReserved map[string]string `json:"kubeReserved,omitempty"`
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
// Refer to [Node Allocatable](https://git.k8s.io/community/contributors/design-proposals/node/node-allocatable.md) doc for more information.
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: ""
// +optional
SystemReservedCgroup string `json:"systemReservedCgroup,omitempty"`
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
// Refer to [Node Allocatable](https://git.k8s.io/community/contributors/design-proposals/node/node-allocatable.md) doc for more information.
// Dynamic Kubelet Config (beta): This field should not be updated without a full node
// reboot. It is safest to keep this value the same as the local config.
// Default: ""
// +optional
KubeReservedCgroup string `json:"kubeReservedCgroup,omitempty"`
@ -429,6 +655,12 @@ type KubeletConfiguration struct {
// This flag accepts a list of options. Acceptable options are `none`, `pods`, `system-reserved` & `kube-reserved`.
// If `none` is specified, no other options may be specified.
// Refer to [Node Allocatable](https://git.k8s.io/community/contributors/design-proposals/node/node-allocatable.md) doc for more information.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// removing enforcements may reduce the stability of the node. Alternatively, adding
// enforcements may reduce the stability of components which were using more than
// the reserved amount of resources; for example, enforcing kube-reserved may cause
// Kubelets to OOM if it uses more than the reserved resources, and enforcing system-reserved
// may cause system daemons to OOM if they use more than the reserved resources.
// Default: ["pods"]
// +optional
EnforceNodeAllocatable []string `json:"enforceNodeAllocatable,omitempty"`
@ -500,3 +732,15 @@ type KubeletAnonymousAuthentication struct {
// +optional
Enabled *bool `json:"enabled,omitempty"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// SerializedNodeConfigSource allows us to serialize v1.NodeConfigSource.
// This type is used internally by the Kubelet for tracking checkpointed dynamic configs.
// It exists in the kubeletconfig API group because it is classified as a versioned input to the Kubelet.
type SerializedNodeConfigSource struct {
metav1.TypeMeta `json:",inline"`
// Source is the source that we are serializing
// +optional
Source v1.NodeConfigSource `json:"source,omitempty" protobuf:"bytes,1,opt,name=source"`
}

View File

@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -51,6 +51,8 @@ func RegisterConversions(scheme *runtime.Scheme) error {
Convert_kubeletconfig_KubeletWebhookAuthorization_To_v1beta1_KubeletWebhookAuthorization,
Convert_v1beta1_KubeletX509Authentication_To_kubeletconfig_KubeletX509Authentication,
Convert_kubeletconfig_KubeletX509Authentication_To_v1beta1_KubeletX509Authentication,
Convert_v1beta1_SerializedNodeConfigSource_To_kubeletconfig_SerializedNodeConfigSource,
Convert_kubeletconfig_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConfigSource,
)
}
@ -154,6 +156,8 @@ func autoConvert_v1beta1_KubeletConfiguration_To_kubeletconfig_KubeletConfigurat
out.TLSPrivateKeyFile = in.TLSPrivateKeyFile
out.TLSCipherSuites = *(*[]string)(unsafe.Pointer(&in.TLSCipherSuites))
out.TLSMinVersion = in.TLSMinVersion
out.RotateCertificates = in.RotateCertificates
out.ServerTLSBootstrap = in.ServerTLSBootstrap
if err := Convert_v1beta1_KubeletAuthentication_To_kubeletconfig_KubeletAuthentication(&in.Authentication, &out.Authentication, s); err != nil {
return err
}
@ -200,6 +204,7 @@ func autoConvert_v1beta1_KubeletConfiguration_To_kubeletconfig_KubeletConfigurat
out.CgroupDriver = in.CgroupDriver
out.CPUManagerPolicy = in.CPUManagerPolicy
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
out.QOSReserved = *(*map[string]string)(unsafe.Pointer(&in.QOSReserved))
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.HairpinMode = in.HairpinMode
out.MaxPods = in.MaxPods
@ -275,6 +280,8 @@ func autoConvert_kubeletconfig_KubeletConfiguration_To_v1beta1_KubeletConfigurat
out.TLSPrivateKeyFile = in.TLSPrivateKeyFile
out.TLSCipherSuites = *(*[]string)(unsafe.Pointer(&in.TLSCipherSuites))
out.TLSMinVersion = in.TLSMinVersion
out.RotateCertificates = in.RotateCertificates
out.ServerTLSBootstrap = in.ServerTLSBootstrap
if err := Convert_kubeletconfig_KubeletAuthentication_To_v1beta1_KubeletAuthentication(&in.Authentication, &out.Authentication, s); err != nil {
return err
}
@ -321,6 +328,7 @@ func autoConvert_kubeletconfig_KubeletConfiguration_To_v1beta1_KubeletConfigurat
out.CgroupDriver = in.CgroupDriver
out.CPUManagerPolicy = in.CPUManagerPolicy
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
out.QOSReserved = *(*map[string]string)(unsafe.Pointer(&in.QOSReserved))
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.HairpinMode = in.HairpinMode
out.MaxPods = in.MaxPods
@ -449,3 +457,23 @@ func autoConvert_kubeletconfig_KubeletX509Authentication_To_v1beta1_KubeletX509A
func Convert_kubeletconfig_KubeletX509Authentication_To_v1beta1_KubeletX509Authentication(in *kubeletconfig.KubeletX509Authentication, out *KubeletX509Authentication, s conversion.Scope) error {
return autoConvert_kubeletconfig_KubeletX509Authentication_To_v1beta1_KubeletX509Authentication(in, out, s)
}
func autoConvert_v1beta1_SerializedNodeConfigSource_To_kubeletconfig_SerializedNodeConfigSource(in *SerializedNodeConfigSource, out *kubeletconfig.SerializedNodeConfigSource, s conversion.Scope) error {
out.Source = in.Source
return nil
}
// Convert_v1beta1_SerializedNodeConfigSource_To_kubeletconfig_SerializedNodeConfigSource is an autogenerated conversion function.
func Convert_v1beta1_SerializedNodeConfigSource_To_kubeletconfig_SerializedNodeConfigSource(in *SerializedNodeConfigSource, out *kubeletconfig.SerializedNodeConfigSource, s conversion.Scope) error {
return autoConvert_v1beta1_SerializedNodeConfigSource_To_kubeletconfig_SerializedNodeConfigSource(in, out, s)
}
func autoConvert_kubeletconfig_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConfigSource(in *kubeletconfig.SerializedNodeConfigSource, out *SerializedNodeConfigSource, s conversion.Scope) error {
out.Source = in.Source
return nil
}
// Convert_kubeletconfig_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConfigSource is an autogenerated conversion function.
func Convert_kubeletconfig_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConfigSource(in *kubeletconfig.SerializedNodeConfigSource, out *SerializedNodeConfigSource, s conversion.Scope) error {
return autoConvert_kubeletconfig_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConfigSource(in, out, s)
}

View File

@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -193,6 +193,13 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
}
}
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
if in.QOSReserved != nil {
in, out := &in.QOSReserved, &out.QOSReserved
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
if in.PodPidsLimit != nil {
in, out := &in.PodPidsLimit, &out.PodPidsLimit
@ -419,3 +426,29 @@ func (in *KubeletX509Authentication) DeepCopy() *KubeletX509Authentication {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SerializedNodeConfigSource) DeepCopyInto(out *SerializedNodeConfigSource) {
*out = *in
out.TypeMeta = in.TypeMeta
in.Source.DeepCopyInto(&out.Source)
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SerializedNodeConfigSource.
func (in *SerializedNodeConfigSource) DeepCopy() *SerializedNodeConfigSource {
if in == nil {
return nil
}
out := new(SerializedNodeConfigSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *SerializedNodeConfigSource) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}

View File

@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -8,13 +8,52 @@ load(
go_library(
name = "go_default_library",
srcs = ["validation.go"],
srcs = [
"validation.go",
] + select({
"@io_bazel_rules_go//go/platform:android": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:darwin": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:linux": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:nacl": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:plan9": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:solaris": [
"validation_others.go",
],
"@io_bazel_rules_go//go/platform:windows": [
"validation_windows.go",
],
"//conditions:default": [],
}),
importpath = "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/validation",
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/validation:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@ -21,6 +21,8 @@ import (
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilvalidation "k8s.io/apimachinery/pkg/util/validation"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
@ -29,6 +31,11 @@ import (
func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error {
allErrors := []error{}
// Make a local copy of the global feature gates and combine it with the gates set by this configuration.
// This allows us to validate the config against the set of gates it will actually run against.
localFeatureGate := utilfeature.DefaultFeatureGate.DeepCopy()
localFeatureGate.SetFromMap(kc.FeatureGates)
if !kc.CgroupsPerQOS && len(kc.EnforceNodeAllocatable) > 0 {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: EnforceNodeAllocatable (--enforce-node-allocatable) is not supported unless CgroupsPerQOS (--cgroups-per-qos) feature is turned on"))
}
@ -86,6 +93,12 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error
if kc.RegistryPullQPS < 0 {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: RegistryPullQPS (--registry-qps) %v must not be a negative number", kc.RegistryPullQPS))
}
if kc.RotateCertificates && !localFeatureGate.Enabled(features.RotateKubeletClientCertificate) {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: RotateCertificates %v requires feature gate RotateKubeletClientCertificate", kc.RotateCertificates))
}
if kc.ServerTLSBootstrap && !localFeatureGate.Enabled(features.RotateKubeletServerCertificate) {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ServerTLSBootstrap %v requires feature gate RotateKubeletServerCertificate", kc.ServerTLSBootstrap))
}
for _, val := range kc.EnforceNodeAllocatable {
switch val {
case kubetypes.NodeAllocatableEnforcementKey:
@ -108,5 +121,9 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error
allErrors = append(allErrors, fmt.Errorf("invalid configuration: option %q specified for HairpinMode (--hairpin-mode). Valid options are %q, %q or %q",
kc.HairpinMode, kubeletconfig.HairpinNone, kubeletconfig.HairpinVeth, kubeletconfig.PromiscuousBridge))
}
if err := validateKubeletOSConfiguration(kc); err != nil {
allErrors = append(allErrors, err)
}
return utilerrors.NewAggregate(allErrors)
}

View File

@ -1,7 +1,7 @@
// +build linux
// +build !windows
/*
Copyright 2015 The Kubernetes Authors.
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -16,15 +16,13 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package container
package validation
import (
"os"
"os/exec"
"github.com/kr/pty"
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
)
func StartPty(c *exec.Cmd) (*os.File, error) {
return pty.Start(c)
// validateKubeletOSConfiguration validates os specific kubelet configuration and returns an error if it is invalid.
func validateKubeletOSConfiguration(kc *kubeletconfig.KubeletConfiguration) error {
return nil
}

View File

@ -0,0 +1,42 @@
// +build windows
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"fmt"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
)
// validateKubeletOSConfiguration validates os specific kubelet configuration and returns an error if it is invalid.
func validateKubeletOSConfiguration(kc *kubeletconfig.KubeletConfiguration) error {
message := "invalid configuration: %v (%v) %v is not supported on Windows"
allErrors := []error{}
if kc.CgroupsPerQOS {
allErrors = append(allErrors, fmt.Errorf(message, "CgroupsPerQOS", "--cgroups-per-qos", kc.CgroupsPerQOS))
}
if len(kc.EnforceNodeAllocatable) > 0 {
allErrors = append(allErrors, fmt.Errorf(message, "EnforceNodeAllocatable", "--enforce-node-allocatable", kc.EnforceNodeAllocatable))
}
return utilerrors.NewAggregate(allErrors)
}

View File

@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -112,6 +112,13 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
out.ImageMinimumGCAge = in.ImageMinimumGCAge
out.VolumeStatsAggPeriod = in.VolumeStatsAggPeriod
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
if in.QOSReserved != nil {
in, out := &in.QOSReserved, &out.QOSReserved
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
if in.EvictionHard != nil {
in, out := &in.EvictionHard, &out.EvictionHard
@ -239,3 +246,29 @@ func (in *KubeletX509Authentication) DeepCopy() *KubeletX509Authentication {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SerializedNodeConfigSource) DeepCopyInto(out *SerializedNodeConfigSource) {
*out = *in
out.TypeMeta = in.TypeMeta
in.Source.DeepCopyInto(&out.Source)
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SerializedNodeConfigSource.
func (in *SerializedNodeConfigSource) DeepCopy() *SerializedNodeConfigSource {
if in == nil {
return nil
}
out := new(SerializedNodeConfigSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *SerializedNodeConfigSource) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}

View File

@ -0,0 +1,40 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"api.pb.go",
"constants.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/apis/pluginregistration/v1alpha1",
deps = [
"//vendor/github.com/gogo/protobuf/gogoproto:go_default_library",
"//vendor/github.com/gogo/protobuf/proto:go_default_library",
"//vendor/golang.org/x/net/context:go_default_library",
"//vendor/google.golang.org/grpc:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)
filegroup(
name = "go_default_library_protos",
srcs = ["api.proto"],
visibility = ["//visibility:public"],
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,60 @@
// To regenerate api.pb.go run hack/update-generated-kubelet-plugin-registration.sh
syntax = 'proto3';
package pluginregistration;
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
option (gogoproto.goproto_getters_all) = true;
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (gogoproto.goproto_unrecognized_all) = false;
// PluginInfo is the message sent from a plugin to the Kubelet pluginwatcher for plugin registration
message PluginInfo {
// Type of the Plugin. CSIPlugin or DevicePlugin
string type = 1;
// Plugin name that uniquely identifies the plugin for the given plugin type.
// For DevicePlugin, this is the resource name that the plugin manages and
// should follow the extended resource name convention.
// For CSI, this is the CSI driver registrar name.
string name = 2;
// Optional endpoint location. If found set by Kubelet component,
// Kubelet component will use this endpoint for specific requests.
// This allows the plugin to register using one endpoint and possibly use
// a different socket for control operations. CSI uses this model to delegate
// its registration external from the plugin.
string endpoint = 3;
// Plugin service API versions the plugin supports.
// For DevicePlugin, this maps to the deviceplugin API versions the
// plugin supports at the given socket.
// The Kubelet component communicating with the plugin should be able
// to choose any preferred version from this list, or returns an error
// if none of the listed versions is supported.
repeated string supported_versions = 4;
}
// RegistrationStatus is the message sent from Kubelet pluginwatcher to the plugin for notification on registration status
message RegistrationStatus {
// True if plugin gets registered successfully at Kubelet
bool plugin_registered = 1;
// Error message in case plugin fails to register, empty string otherwise
string error = 2;
}
// RegistrationStatusResponse is sent by plugin to kubelet in response to RegistrationStatus RPC
message RegistrationStatusResponse {
}
// InfoRequest is the empty request message from Kubelet
message InfoRequest {
}
// Registration is the service advertised by the Plugins.
service Registration {
rpc GetInfo(InfoRequest) returns (PluginInfo) {}
rpc NotifyRegistrationStatus(RegistrationStatus) returns (RegistrationStatusResponse) {}
}

View File

@ -1,7 +1,5 @@
// +build !linux
/*
Copyright 2015 The Kubernetes Authors.
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -16,13 +14,9 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package container
package pluginregistration
import (
"os"
"os/exec"
const (
CSIPlugin = "CSIPlugin"
DevicePlugin = "DevicePlugin"
)
func StartPty(c *exec.Cmd) (pty *os.File, err error) {
return nil, nil
}

View File

@ -83,7 +83,7 @@ type RuntimeStats struct {
const (
// SystemContainerKubelet is the container name for the system container tracking Kubelet usage.
SystemContainerKubelet = "kubelet"
// SystemContainerRuntime is the container name for the system container tracking the runtime (e.g. docker or rkt) usage.
// SystemContainerRuntime is the container name for the system container tracking the runtime (e.g. docker) usage.
SystemContainerRuntime = "runtime"
// SystemContainerMisc is the container name for the system container tracking non-kubernetes processes.
SystemContainerMisc = "misc"

View File

@ -19,7 +19,7 @@ package apis
const (
// When kubelet is started with the "external" cloud provider, then
// it sets this annotation on the node to denote an ip address set from the
// cmd line flag. This ip is verified with the cloudprovider as valid by
// cmd line flag (--node-ip). This ip is verified with the cloudprovider as valid by
// the cloud-controller-manager
AnnotationProvidedIPAddr = "alpha.kubernetes.io/provided-node-ip"
)

View File

@ -109,9 +109,10 @@ func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, roo
sysFs := sysfs.NewRealSysFs()
ignoreMetrics := cadvisormetrics.MetricSet{
cadvisormetrics.NetworkTcpUsageMetrics: struct{}{},
cadvisormetrics.NetworkUdpUsageMetrics: struct{}{},
cadvisormetrics.PerCpuUsageMetrics: struct{}{},
cadvisormetrics.NetworkTcpUsageMetrics: struct{}{},
cadvisormetrics.NetworkUdpUsageMetrics: struct{}{},
cadvisormetrics.PerCpuUsageMetrics: struct{}{},
cadvisormetrics.ProcessSchedulerMetrics: struct{}{},
}
if !usingLegacyStats {
ignoreMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}

View File

@ -26,6 +26,7 @@ import (
)
type cadvisorClient struct {
rootPath string
winStatsClient winstats.Client
}
@ -34,7 +35,10 @@ var _ Interface = new(cadvisorClient)
// New creates a cAdvisor and exports its API on the specified port if port > 0.
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string, usingLegacyStats bool) (Interface, error) {
client, err := winstats.NewPerfCounterClient()
return &cadvisorClient{winStatsClient: client}, err
return &cadvisorClient{
rootPath: rootPath,
winStatsClient: client,
}, err
}
func (cu *cadvisorClient) Start() error {
@ -70,7 +74,7 @@ func (cu *cadvisorClient) ImagesFsInfo() (cadvisorapiv2.FsInfo, error) {
}
func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
return cu.GetDirFsInfo(cu.rootPath)
}
func (cu *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) {

View File

@ -38,8 +38,6 @@ func (i *imageFsInfoProvider) ImageFsInfoLabel() (string, error) {
switch i.runtime {
case types.DockerContainerRuntime:
return cadvisorfs.LabelDockerImages, nil
case types.RktContainerRuntime:
return cadvisorfs.LabelRktImages, nil
case types.RemoteContainerRuntime:
// This is a temporary workaround to get stats for cri-o from cadvisor
// and should be removed.

View File

@ -75,7 +75,6 @@ func EphemeralStorageCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceLis
// https://github.com/kubernetes/kubernetes/issues/51798
// UsingLegacyCadvisorStats returns true if container stats are provided by cadvisor instead of through the CRI
func UsingLegacyCadvisorStats(runtime, runtimeEndpoint string) bool {
return runtime == kubetypes.RktContainerRuntime ||
(runtime == kubetypes.DockerContainerRuntime && goruntime.GOOS == "linux") ||
return (runtime == kubetypes.DockerContainerRuntime && goruntime.GOOS == "linux") ||
runtimeEndpoint == CrioSocket
}

View File

@ -26,6 +26,7 @@ go_library(
"//vendor/k8s.io/client-go/kubernetes/typed/certificates/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/util/certificate:go_default_library",
"//vendor/k8s.io/client-go/util/connrotation:go_default_library",
],
)

View File

@ -1,6 +1,7 @@
reviewers:
- mikedanese
- liggitt
- awly
approvers:
- mikedanese
- liggitt

View File

@ -30,6 +30,7 @@ go_library(
"//vendor/k8s.io/client-go/tools/clientcmd/api:go_default_library",
"//vendor/k8s.io/client-go/transport:go_default_library",
"//vendor/k8s.io/client-go/util/cert:go_default_library",
"//vendor/k8s.io/client-go/util/certificate:go_default_library",
"//vendor/k8s.io/client-go/util/certificate/csr:go_default_library",
],
)

View File

@ -32,13 +32,11 @@ import (
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
"k8s.io/client-go/transport"
certutil "k8s.io/client-go/util/cert"
"k8s.io/client-go/util/certificate"
"k8s.io/client-go/util/certificate/csr"
)
const (
defaultKubeletClientCertificateFile = "kubelet-client.crt"
defaultKubeletClientKeyFile = "kubelet-client.key"
)
const tmpPrivateKeyFile = "kubelet-client.key.tmp"
// LoadClientCert requests a client cert for kubelet if the kubeconfigPath file does not exist.
// The kubeconfig at bootstrapPath is used to request a client certificate from the API server.
@ -66,48 +64,46 @@ func LoadClientCert(kubeconfigPath string, bootstrapPath string, certDir string,
return fmt.Errorf("unable to create certificates signing request client: %v", err)
}
success := false
// Get the private key.
keyPath, err := filepath.Abs(filepath.Join(certDir, defaultKubeletClientKeyFile))
store, err := certificate.NewFileStore("kubelet-client", certDir, certDir, "", "")
if err != nil {
return fmt.Errorf("unable to build bootstrap key path: %v", err)
}
// If we are unable to generate a CSR, we remove our key file and start fresh.
// This method is used before enabling client rotation and so we must ensure we
// can make forward progress if we crash and exit when a CSR exists but the cert
// it is signed for has expired.
defer func() {
if !success {
if err := os.Remove(keyPath); err != nil && !os.IsNotExist(err) {
glog.Warningf("Cannot clean up the key file %q: %v", keyPath, err)
}
}
}()
keyData, _, err := certutil.LoadOrGenerateKeyFile(keyPath)
if err != nil {
return err
return fmt.Errorf("unable to build bootstrap cert store")
}
// Get the cert.
certPath, err := filepath.Abs(filepath.Join(certDir, defaultKubeletClientCertificateFile))
if err != nil {
return fmt.Errorf("unable to build bootstrap client cert path: %v", err)
}
defer func() {
if !success {
if err := os.Remove(certPath); err != nil && !os.IsNotExist(err) {
glog.Warningf("Cannot clean up the cert file %q: %v", certPath, err)
var keyData []byte
if cert, err := store.Current(); err == nil {
if cert.PrivateKey != nil {
keyData, err = certutil.MarshalPrivateKeyToPEM(cert.PrivateKey)
if err != nil {
keyData = nil
}
}
}()
}
// Cache the private key in a separate file until CSR succeeds. This has to
// be a separate file because store.CurrentPath() points to a symlink
// managed by the store.
privKeyPath := filepath.Join(certDir, tmpPrivateKeyFile)
if !verifyKeyData(keyData) {
glog.V(2).Infof("No valid private key and/or certificate found, reusing existing private key or creating a new one")
// Note: always call LoadOrGenerateKeyFile so that private key is
// reused on next startup if CSR request fails.
keyData, _, err = certutil.LoadOrGenerateKeyFile(privKeyPath)
if err != nil {
return err
}
}
certData, err := csr.RequestNodeCertificate(bootstrapClient.CertificateSigningRequests(), keyData, nodeName)
if err != nil {
return err
}
if err := certutil.WriteCert(certPath, certData); err != nil {
if _, err := store.Update(certData, keyData); err != nil {
return err
}
if err := os.Remove(privKeyPath); err != nil && !os.IsNotExist(err) {
glog.V(2).Infof("failed cleaning up private key file %q: %v", privKeyPath, err)
}
pemPath := store.CurrentPath()
// Get the CA data from the bootstrap client config.
caFile, caData := bootstrapClientConfig.CAFile, []byte{}
@ -126,8 +122,8 @@ func LoadClientCert(kubeconfigPath string, bootstrapPath string, certDir string,
}},
// Define auth based on the obtained client cert.
AuthInfos: map[string]*clientcmdapi.AuthInfo{"default-auth": {
ClientCertificate: certPath,
ClientKey: keyPath,
ClientCertificate: pemPath,
ClientKey: pemPath,
}},
// Define a context that connects the auth info and cluster, and set it as the default
Contexts: map[string]*clientcmdapi.Context{"default-context": {
@ -139,12 +135,7 @@ func LoadClientCert(kubeconfigPath string, bootstrapPath string, certDir string,
}
// Marshal to disk
if err := clientcmd.WriteToFile(kubeconfigData, kubeconfigPath); err != nil {
return err
}
success = true
return nil
return clientcmd.WriteToFile(kubeconfigData, kubeconfigPath)
}
func loadRESTClientConfig(kubeconfig string) (*restclient.Config, error) {
@ -207,3 +198,12 @@ func verifyBootstrapClientConfig(kubeconfigPath string) (bool, error) {
}
return true, nil
}
// verifyKeyData returns true if the provided data appears to be a valid private key.
func verifyKeyData(data []byte) bool {
if len(data) == 0 {
return false
}
_, err := certutil.ParsePrivateKeyPEM(data)
return err == nil
}

View File

@ -17,12 +17,10 @@ limitations under the License.
package certificate
import (
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
"sync"
"time"
"github.com/golang/glog"
@ -31,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/util/certificate"
"k8s.io/client-go/util/connrotation"
)
// UpdateTransport instruments a restconfig with a transport that dynamically uses
@ -38,6 +37,8 @@ import (
//
// The config must not already provide an explicit transport.
//
// The returned function allows forcefully closing all active connections.
//
// The returned transport periodically checks the manager to determine if the
// certificate has changed. If it has, the transport shuts down all existing client
// connections, forcing the client to re-handshake with the server and use the
@ -51,87 +52,87 @@ import (
//
// stopCh should be used to indicate when the transport is unused and doesn't need
// to continue checking the manager.
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) (func(), error) {
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitAfter)
}
// updateTransport is an internal method that exposes how often this method checks that the
// client cert has changed.
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
if clientConfig.Transport != nil {
return fmt.Errorf("there is already a transport configured")
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) (func(), error) {
if clientConfig.Transport != nil || clientConfig.Dial != nil {
return nil, fmt.Errorf("there is already a transport or dialer configured")
}
d := connrotation.NewDialer((&net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second}).DialContext)
tlsConfig, err := restclient.TLSConfigFor(clientConfig)
if err != nil {
return fmt.Errorf("unable to configure TLS for the rest client: %v", err)
return nil, fmt.Errorf("unable to configure TLS for the rest client: %v", err)
}
if tlsConfig == nil {
tlsConfig = &tls.Config{}
}
tlsConfig.Certificates = nil
tlsConfig.GetClientCertificate = func(requestInfo *tls.CertificateRequestInfo) (*tls.Certificate, error) {
cert := clientCertificateManager.Current()
if cert == nil {
return &tls.Certificate{Certificate: nil}, nil
}
return cert, nil
}
// Custom dialer that will track all connections it creates.
t := &connTracker{
dialer: &net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second},
conns: make(map[*closableConn]struct{}),
}
lastCertAvailable := time.Now()
lastCert := clientCertificateManager.Current()
go wait.Until(func() {
curr := clientCertificateManager.Current()
if exitAfter > 0 {
now := time.Now()
if curr == nil {
// the certificate has been deleted from disk or is otherwise corrupt
if now.After(lastCertAvailable.Add(exitAfter)) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("It has been %s since a valid client cert was found and the server is responsive, exiting.", exitAfter)
} else {
glog.Errorf("It has been %s since a valid client cert was found, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.", exitAfter)
}
}
} else {
// the certificate is expired
if now.After(curr.Leaf.NotAfter) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
} else {
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
}
}
lastCertAvailable = now
if clientCertificateManager != nil {
tlsConfig.Certificates = nil
tlsConfig.GetClientCertificate = func(requestInfo *tls.CertificateRequestInfo) (*tls.Certificate, error) {
cert := clientCertificateManager.Current()
if cert == nil {
return &tls.Certificate{Certificate: nil}, nil
}
return cert, nil
}
if curr == nil || lastCert == curr {
// Cert hasn't been rotated.
return
}
lastCert = curr
lastCertAvailable := time.Now()
lastCert := clientCertificateManager.Current()
go wait.Until(func() {
curr := clientCertificateManager.Current()
glog.Infof("certificate rotation detected, shutting down client connections to start using new credentials")
// The cert has been rotated. Close all existing connections to force the client
// to reperform its TLS handshake with new cert.
//
// See: https://github.com/kubernetes-incubator/bootkube/pull/663#issuecomment-318506493
t.closeAllConns()
}, period, stopCh)
if exitAfter > 0 {
now := time.Now()
if curr == nil {
// the certificate has been deleted from disk or is otherwise corrupt
if now.After(lastCertAvailable.Add(exitAfter)) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("It has been %s since a valid client cert was found and the server is responsive, exiting.", exitAfter)
} else {
glog.Errorf("It has been %s since a valid client cert was found, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.", exitAfter)
}
}
} else {
// the certificate is expired
if now.After(curr.Leaf.NotAfter) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
} else {
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
}
}
lastCertAvailable = now
}
}
if curr == nil || lastCert == curr {
// Cert hasn't been rotated.
return
}
lastCert = curr
glog.Infof("certificate rotation detected, shutting down client connections to start using new credentials")
// The cert has been rotated. Close all existing connections to force the client
// to reperform its TLS handshake with new cert.
//
// See: https://github.com/kubernetes-incubator/bootkube/pull/663#issuecomment-318506493
d.CloseAll()
}, period, stopCh)
}
clientConfig.Transport = utilnet.SetTransportDefaults(&http.Transport{
Proxy: http.ProxyFromEnvironment,
TLSHandshakeTimeout: 10 * time.Second,
TLSClientConfig: tlsConfig,
MaxIdleConnsPerHost: 25,
DialContext: t.DialContext, // Use custom dialer.
DialContext: d.DialContext, // Use custom dialer.
})
// Zero out all existing TLS options since our new transport enforces them.
@ -142,60 +143,6 @@ func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig
clientConfig.CAData = nil
clientConfig.CAFile = ""
clientConfig.Insecure = false
return nil
}
// connTracker is a dialer that tracks all open connections it creates.
type connTracker struct {
dialer *net.Dialer
mu sync.Mutex
conns map[*closableConn]struct{}
}
// closeAllConns forcibly closes all tracked connections.
func (c *connTracker) closeAllConns() {
c.mu.Lock()
conns := c.conns
c.conns = make(map[*closableConn]struct{})
c.mu.Unlock()
for conn := range conns {
conn.Close()
}
}
func (c *connTracker) DialContext(ctx context.Context, network, address string) (net.Conn, error) {
conn, err := c.dialer.DialContext(ctx, network, address)
if err != nil {
return nil, err
}
closable := &closableConn{Conn: conn}
// Start tracking the connection
c.mu.Lock()
c.conns[closable] = struct{}{}
c.mu.Unlock()
// When the connection is closed, remove it from the map. This will
// be no-op if the connection isn't in the map, e.g. if closeAllConns()
// is called.
closable.onClose = func() {
c.mu.Lock()
delete(c.conns, closable)
c.mu.Unlock()
}
return closable, nil
}
type closableConn struct {
onClose func()
net.Conn
}
func (c *closableConn) Close() error {
go c.onClose()
return c.Conn.Close()
return d.CloseAll, nil
}

View File

@ -187,7 +187,7 @@ func TestRotateShutsDownConnections(t *testing.T) {
}
// Check for a new cert every 10 milliseconds
if err := updateTransport(stop, 10*time.Millisecond, c, m, 0); err != nil {
if _, err := updateTransport(stop, 10*time.Millisecond, c, m, 0); err != nil {
t.Fatal(err)
}

View File

@ -7,9 +7,8 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/github.com/dchest/safefile:go_default_library",
"//vendor/github.com/ghodss/yaml:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
],
@ -21,6 +20,7 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],

View File

@ -17,20 +17,15 @@ limitations under the License.
package checkpoint
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"sync"
"github.com/dchest/safefile"
"github.com/ghodss/yaml"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/volume/util"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
)
const (
@ -39,54 +34,44 @@ const (
podPrefix = "Pod"
)
// Manager is the interface used to manage checkpoints
// which involves writing resources to disk to recover
// during restart or failure scenarios.
// https://github.com/kubernetes/community/pull/1241/files
type Manager interface {
// LoadPods will load checkpointed Pods from disk
LoadPods() ([]*v1.Pod, error)
// WritePod will serialize a Pod to disk
WritePod(pod *v1.Pod) error
// Deletes the checkpoint of the given pod from disk
DeletePod(pod *v1.Pod) error
type PodCheckpoint interface {
checkpointmanager.Checkpoint
GetPod() *v1.Pod
}
var instance Manager
var mutex = &sync.Mutex{}
// fileCheckPointManager - is a checkpointer that writes contents to disk
// The type information of the resource objects are encoded in the name
type fileCheckPointManager struct {
path string
// Data to be stored as checkpoint
type Data struct {
Pod *v1.Pod
Checksum checksum.Checksum
}
// NewCheckpointManager will create a Manager that points to the following path
func NewCheckpointManager(path string) Manager {
// NOTE: This is a precaution; current implementation should not run
// multiple checkpoint managers.
mutex.Lock()
defer mutex.Unlock()
instance = &fileCheckPointManager{path: path}
return instance
// NewPodCheckpoint returns new pod checkpoint
func NewPodCheckpoint(pod *v1.Pod) PodCheckpoint {
return &Data{Pod: pod}
}
// GetInstance will return the current Manager, there should be only one.
func GetInstance() Manager {
mutex.Lock()
defer mutex.Unlock()
return instance
// MarshalCheckpoint returns marshalled data
func (cp *Data) MarshalCheckpoint() ([]byte, error) {
cp.Checksum = checksum.New(*cp.Pod)
return json.Marshal(*cp)
}
// loadPod will load Pod Checkpoint yaml file.
func (fcp *fileCheckPointManager) loadPod(file string) (*v1.Pod, error) {
return util.LoadPodFromFile(file)
// UnmarshalCheckpoint returns unmarshalled data
func (cp *Data) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, cp)
}
// VerifyChecksum verifies that passed checksum is same as calculated checksum
func (cp *Data) VerifyChecksum() error {
return cp.Checksum.Verify(*cp.Pod)
}
func (cp *Data) GetPod() *v1.Pod {
return cp.Pod
}
// checkAnnotations will validate the checkpoint annotations exist on the Pod
func (fcp *fileCheckPointManager) checkAnnotations(pod *v1.Pod) bool {
func checkAnnotations(pod *v1.Pod) bool {
if podAnnotations := pod.GetAnnotations(); podAnnotations != nil {
if podAnnotations[core.BootstrapCheckpointAnnotationKey] == "true" {
return true
@ -95,57 +80,49 @@ func (fcp *fileCheckPointManager) checkAnnotations(pod *v1.Pod) bool {
return false
}
// getPodPath returns the full qualified path for the pod checkpoint
func (fcp *fileCheckPointManager) getPodPath(pod *v1.Pod) string {
return fmt.Sprintf("%v/Pod%v%v.yaml", fcp.path, delimiter, pod.GetUID())
//getPodKey returns the full qualified path for the pod checkpoint
func getPodKey(pod *v1.Pod) string {
return fmt.Sprintf("Pod%v%v.yaml", delimiter, pod.GetUID())
}
// LoadPods Loads All Checkpoints from disk
func (fcp *fileCheckPointManager) LoadPods() ([]*v1.Pod, error) {
checkpoints := make([]*v1.Pod, 0)
files, err := ioutil.ReadDir(fcp.path)
func LoadPods(cpm checkpointmanager.CheckpointManager) ([]*v1.Pod, error) {
pods := make([]*v1.Pod, 0)
var err error
checkpointKeys := []string{}
checkpointKeys, err = cpm.ListCheckpoints()
if err != nil {
return nil, err
glog.Errorf("Failed to list checkpoints: %v", err)
}
for _, f := range files {
// get just the filename
_, fname := filepath.Split(f.Name())
// Get just the Resource from "Resource_Name"
fnfields := strings.Split(fname, delimiter)
switch fnfields[0] {
case podPrefix:
pod, err := fcp.loadPod(fmt.Sprintf("%s/%s", fcp.path, f.Name()))
if err != nil {
return nil, err
}
checkpoints = append(checkpoints, pod)
default:
glog.Warningf("Unsupported checkpoint file detected %v", f)
for _, key := range checkpointKeys {
checkpoint := NewPodCheckpoint(nil)
err := cpm.GetCheckpoint(key, checkpoint)
if err != nil {
glog.Errorf("Failed to retrieve checkpoint for pod %q: %v", key, err)
continue
}
pods = append(pods, checkpoint.GetPod())
}
return checkpoints, nil
return pods, nil
}
// Writes a checkpoint to a file on disk if annotation is present
func (fcp *fileCheckPointManager) WritePod(pod *v1.Pod) error {
// WritePod a checkpoint to a file on disk if annotation is present
func WritePod(cpm checkpointmanager.CheckpointManager, pod *v1.Pod) error {
var err error
if fcp.checkAnnotations(pod) {
if blob, err := yaml.Marshal(pod); err == nil {
err = safefile.WriteFile(fcp.getPodPath(pod), blob, 0644)
}
if checkAnnotations(pod) {
data := NewPodCheckpoint(pod)
err = cpm.CreateCheckpoint(getPodKey(pod), data)
} else {
// This is to handle an edge where a pod update could remove
// an annotation and the checkpoint should then be removed.
err = fcp.DeletePod(pod)
err = cpm.RemoveCheckpoint(getPodKey(pod))
}
return err
}
// Deletes a checkpoint from disk if present
func (fcp *fileCheckPointManager) DeletePod(pod *v1.Pod) error {
podPath := fcp.getPodPath(pod)
if err := os.Remove(podPath); !os.IsNotExist(err) {
return err
}
return nil
// DeletePod deletes a checkpoint from disk if present
func DeletePod(cpm checkpointmanager.CheckpointManager, pod *v1.Pod) error {
return cpm.RemoveCheckpoint(getPodKey(pod))
}

View File

@ -25,6 +25,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
)
// TestWriteLoadDeletePods validates all combinations of write, load, and delete
@ -70,15 +71,18 @@ func TestWriteLoadDeletePods(t *testing.T) {
}
defer os.RemoveAll(dir)
cp := NewCheckpointManager(dir)
cpm, err := checkpointmanager.NewCheckpointManager(dir)
if err != nil {
t.Errorf("Failed to initialize checkpoint manager error=%v", err)
}
for _, p := range testPods {
// Write pods should always pass unless there is an fs error
if err := cp.WritePod(p.pod); err != nil {
if err := WritePod(cpm, p.pod); err != nil {
t.Errorf("Failed to Write Pod: %v", err)
}
}
// verify the correct written files are loaded from disk
pods, err := cp.LoadPods()
pods, err := LoadPods(cpm)
if err != nil {
t.Errorf("Failed to Load Pods: %v", err)
}
@ -104,7 +108,7 @@ func TestWriteLoadDeletePods(t *testing.T) {
} else if lpod != nil {
t.Errorf("Got unexpected result for %v, should not have been loaded", pname)
}
err = cp.DeletePod(p.pod)
err = DeletePod(cpm, p.pod)
if err != nil {
t.Errorf("Failed to delete pod %v", pname)
}

View File

@ -0,0 +1,48 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = ["checkpoint_manager.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager",
deps = [
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
"//pkg/kubelet/util/store:go_default_library",
"//pkg/util/filesystem:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["checkpoint_manager_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
"//pkg/kubelet/checkpointmanager/testing:go_default_library",
"//pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/kubelet/checkpointmanager/checksum:all-srcs",
"//pkg/kubelet/checkpointmanager/errors:all-srcs",
"//pkg/kubelet/checkpointmanager/testing:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -0,0 +1,25 @@
## DISCLAIMER
- Sig-Node community has reached a general consensus, as a best practice, to
avoid introducing any new checkpointing support. We reached this understanding
after struggling with some hard-to-debug issues in the production environments
caused by the checkpointing.
- Any changes to the checkpointed data structure would be considered incompatible and a component should add its own handling if it needs to ensure backward compatibility of reading old-format checkpoint files.
## Introduction
This folder contains a framework & primitives, Checkpointing Manager, which is
used by several other Kubelet submodules, `dockershim`, `devicemanager`, `pods`
and `cpumanager`, to implement checkpointing at each submodule level. As already
explained in above `Disclaimer` section, think twice before introducing any further
checkpointing in Kubelet. If still checkpointing is required, then this folder
provides the common APIs and the framework for implementing checkpointing.
Using same APIs across all the submodules will help maintaining consistency at
Kubelet level.
Below is the history of checkpointing support in Kubelet.
| Package | First checkpointing support merged on | PR link |
| ------- | --------------------------------------| ------- |
|kubelet/dockershim | Feb 3, 2017 | [[CRI] Implement Dockershim Checkpoint](https://github.com/kubernetes/kubernetes/pull/39903)
|devicemanager| Sep 6, 2017 | [Deviceplugin checkpoint](https://github.com/kubernetes/kubernetes/pull/51744)
| kubelet/pod | Nov 22, 2017 | [Initial basic bootstrap-checkpoint support](https://github.com/kubernetes/kubernetes/pull/50984)
|cpumanager| Oct 27, 2017 |[Add file backed state to cpu manager ](https://github.com/kubernetes/kubernetes/pull/54408)

View File

@ -0,0 +1,110 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package checkpointmanager
import (
"fmt"
"sync"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
)
// Checkpoint provides the process checkpoint data
type Checkpoint interface {
MarshalCheckpoint() ([]byte, error)
UnmarshalCheckpoint(blob []byte) error
VerifyChecksum() error
}
// CheckpointManager provides the interface to manage checkpoint
type CheckpointManager interface {
// CreateCheckpoint persists checkpoint in CheckpointStore. checkpointKey is the key for utilstore to locate checkpoint.
// For file backed utilstore, checkpointKey is the file name to write the checkpoint data.
CreateCheckpoint(checkpointKey string, checkpoint Checkpoint) error
// GetCheckpoint retrieves checkpoint from CheckpointStore.
GetCheckpoint(checkpointKey string, checkpoint Checkpoint) error
// WARNING: RemoveCheckpoint will not return error if checkpoint does not exist.
RemoveCheckpoint(checkpointKey string) error
// ListCheckpoint returns the list of existing checkpoints.
ListCheckpoints() ([]string, error)
}
// impl is an implementation of CheckpointManager. It persists checkpoint in CheckpointStore
type impl struct {
path string
store utilstore.Store
mutex sync.Mutex
}
// NewCheckpointManager returns a new instance of a checkpoint manager
func NewCheckpointManager(checkpointDir string) (CheckpointManager, error) {
fstore, err := utilstore.NewFileStore(checkpointDir, utilfs.DefaultFs{})
if err != nil {
return nil, err
}
return &impl{path: checkpointDir, store: fstore}, nil
}
// CreateCheckpoint persists checkpoint in CheckpointStore.
func (manager *impl) CreateCheckpoint(checkpointKey string, checkpoint Checkpoint) error {
manager.mutex.Lock()
defer manager.mutex.Unlock()
blob, err := checkpoint.MarshalCheckpoint()
if err != nil {
return err
}
return manager.store.Write(checkpointKey, blob)
}
// GetCheckpoint retrieves checkpoint from CheckpointStore.
func (manager *impl) GetCheckpoint(checkpointKey string, checkpoint Checkpoint) error {
manager.mutex.Lock()
defer manager.mutex.Unlock()
blob, err := manager.store.Read(checkpointKey)
if err != nil {
if err == utilstore.ErrKeyNotFound {
return errors.ErrCheckpointNotFound
}
return err
}
err = checkpoint.UnmarshalCheckpoint(blob)
if err == nil {
err = checkpoint.VerifyChecksum()
}
return err
}
// RemoveCheckpoint will not return error if checkpoint does not exist.
func (manager *impl) RemoveCheckpoint(checkpointKey string) error {
manager.mutex.Lock()
defer manager.mutex.Unlock()
return manager.store.Delete(checkpointKey)
}
// ListCheckpoints returns the list of existing checkpoints.
func (manager *impl) ListCheckpoints() ([]string, error) {
manager.mutex.Lock()
defer manager.mutex.Unlock()
keys, err := manager.store.List()
if err != nil {
return []string{}, fmt.Errorf("failed to list checkpoint store: %v", err)
}
return keys, nil
}

View File

@ -0,0 +1,245 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package checkpointmanager
import (
"encoding/json"
"sort"
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
utilstore "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1"
)
var testStore *utilstore.MemStore
type FakeCheckpoint interface {
Checkpoint
GetData() ([]*PortMapping, bool)
}
// Data contains all types of data that can be stored in the checkpoint.
type Data struct {
PortMappings []*PortMapping `json:"port_mappings,omitempty"`
HostNetwork bool `json:"host_network,omitempty"`
}
type CheckpointDataV2 struct {
PortMappings []*PortMapping `json:"port_mappings,omitempty"`
HostNetwork bool `json:"host_network,omitempty"`
V2Field string `json:"v2field"`
}
type protocol string
// portMapping is the port mapping configurations of a sandbox.
type PortMapping struct {
// protocol of the port mapping.
Protocol *protocol
// Port number within the container.
ContainerPort *int32
// Port number on the host.
HostPort *int32
}
// CheckpointData is a sample example structure to be used in test cases for checkpointing
type CheckpointData struct {
Version string
Name string
Data *Data
Checksum checksum.Checksum
}
func newFakeCheckpointV1(name string, portMappings []*PortMapping, hostNetwork bool) FakeCheckpoint {
return &CheckpointData{
Version: "v1",
Name: name,
Data: &Data{
PortMappings: portMappings,
HostNetwork: hostNetwork,
},
}
}
func (cp *CheckpointData) MarshalCheckpoint() ([]byte, error) {
cp.Checksum = checksum.New(*cp.Data)
return json.Marshal(*cp)
}
func (cp *CheckpointData) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, cp)
}
func (cp *CheckpointData) VerifyChecksum() error {
return cp.Checksum.Verify(*cp.Data)
}
func (cp *CheckpointData) GetData() ([]*PortMapping, bool) {
return cp.Data.PortMappings, cp.Data.HostNetwork
}
type checkpointDataV2 struct {
Version string
Name string
Data *CheckpointDataV2
Checksum checksum.Checksum
}
func newFakeCheckpointV2(name string, portMappings []*PortMapping, hostNetwork bool) FakeCheckpoint {
return &checkpointDataV2{
Version: "v2",
Name: name,
Data: &CheckpointDataV2{
PortMappings: portMappings,
HostNetwork: hostNetwork,
},
}
}
func newFakeCheckpointRemoteV1(name string, portMappings []*v1.PortMapping, hostNetwork bool) Checkpoint {
return &v1.CheckpointData{
Version: "v1",
Name: name,
Data: &v1.Data{
PortMappings: portMappings,
HostNetwork: hostNetwork,
},
}
}
func (cp *checkpointDataV2) MarshalCheckpoint() ([]byte, error) {
cp.Checksum = checksum.New(*cp.Data)
return json.Marshal(*cp)
}
func (cp *checkpointDataV2) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, cp)
}
func (cp *checkpointDataV2) VerifyChecksum() error {
return cp.Checksum.Verify(*cp.Data)
}
func (cp *checkpointDataV2) GetData() ([]*PortMapping, bool) {
return cp.Data.PortMappings, cp.Data.HostNetwork
}
func newTestCheckpointManager() CheckpointManager {
return &impl{store: testStore}
}
func TestCheckpointManager(t *testing.T) {
var err error
testStore = utilstore.NewMemStore()
manager := newTestCheckpointManager()
port80 := int32(80)
port443 := int32(443)
proto := protocol("tcp")
portMappings := []*PortMapping{
{
&proto,
&port80,
&port80,
},
{
&proto,
&port443,
&port443,
},
}
checkpoint1 := newFakeCheckpointV1("check1", portMappings, true)
checkpoints := []struct {
checkpointKey string
checkpoint FakeCheckpoint
expectHostNetwork bool
}{
{
"key1",
checkpoint1,
true,
},
{
"key2",
newFakeCheckpointV1("check2", nil, false),
false,
},
}
for _, tc := range checkpoints {
// Test CreateCheckpoints
err = manager.CreateCheckpoint(tc.checkpointKey, tc.checkpoint)
assert.NoError(t, err)
// Test GetCheckpoints
checkpointOut := newFakeCheckpointV1("", nil, false)
err := manager.GetCheckpoint(tc.checkpointKey, checkpointOut)
assert.NoError(t, err)
actualPortMappings, actualHostNetwork := checkpointOut.GetData()
expPortMappings, expHostNetwork := tc.checkpoint.GetData()
assert.Equal(t, actualPortMappings, expPortMappings)
assert.Equal(t, actualHostNetwork, expHostNetwork)
}
// Test it fails if tried to read V1 structure into V2, a different structure from the structure which is checkpointed
checkpointV2 := newFakeCheckpointV2("", nil, false)
err = manager.GetCheckpoint("key1", checkpointV2)
assert.EqualError(t, err, "checkpoint is corrupted")
// Test it fails if tried to read V1 structure into the same structure but defined in another package
checkpointRemoteV1 := newFakeCheckpointRemoteV1("", nil, false)
err = manager.GetCheckpoint("key1", checkpointRemoteV1)
assert.EqualError(t, err, "checkpoint is corrupted")
// Test it works if tried to read V1 structure using into a new V1 structure
checkpointV1 := newFakeCheckpointV1("", nil, false)
err = manager.GetCheckpoint("key1", checkpointV1)
assert.NoError(t, err)
// Test corrupt checksum case
checkpointOut := newFakeCheckpointV1("", nil, false)
blob, err := checkpointOut.MarshalCheckpoint()
assert.NoError(t, err)
testStore.Write("key1", blob)
err = manager.GetCheckpoint("key1", checkpoint1)
assert.EqualError(t, err, "checkpoint is corrupted")
// Test ListCheckpoints
keys, err := manager.ListCheckpoints()
assert.NoError(t, err)
sort.Strings(keys)
assert.Equal(t, keys, []string{"key1", "key2"})
// Test RemoveCheckpoints
err = manager.RemoveCheckpoint("key1")
assert.NoError(t, err)
// Test Remove Nonexisted Checkpoints
err = manager.RemoveCheckpoint("key1")
assert.NoError(t, err)
// Test ListCheckpoints
keys, err = manager.ListCheckpoints()
assert.NoError(t, err)
assert.Equal(t, keys, []string{"key2"})
// Test Get NonExisted Checkpoint
checkpointNE := newFakeCheckpointV1("NE", nil, false)
err = manager.GetCheckpoint("key1", checkpointNE)
assert.Error(t, err)
}

View File

@ -0,0 +1,26 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["checksum.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum",
visibility = ["//visibility:public"],
deps = [
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
"//pkg/util/hash:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,46 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package checksum
import (
"hash/fnv"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
hashutil "k8s.io/kubernetes/pkg/util/hash"
)
// Data to be stored as checkpoint
type Checksum uint64
// VerifyChecksum verifies that passed checksum is same as calculated checksum
func (cs Checksum) Verify(data interface{}) error {
if cs != New(data) {
return errors.ErrCorruptCheckpoint
}
return nil
}
func New(data interface{}) Checksum {
return Checksum(getChecksum(data))
}
// Get returns calculated checksum of checkpoint data
func getChecksum(data interface{}) uint64 {
hash := fnv.New32a()
hashutil.DeepHashObject(hash, data)
return uint64(hash.Sum32())
}

View File

@ -7,8 +7,8 @@ load(
go_library(
name = "go_default_library",
srcs = ["util.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/testing",
srcs = ["errors.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors",
)
filegroup(

View File

@ -1,5 +1,5 @@
/*
Copyright 2015 The Kubernetes Authors.
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -14,5 +14,12 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
// Package rkt contains the Containerruntime interface implementation for rkt.
package rkt // import "k8s.io/kubernetes/pkg/kubelet/rkt"
package errors
import "fmt"
// ErrCorruptCheckpoint error is reported when checksum does not match
var ErrCorruptCheckpoint = fmt.Errorf("checkpoint is corrupted")
// ErrCheckpointNotFound is reported when checkpoint is not found for a given key
var ErrCheckpointNotFound = fmt.Errorf("checkpoint is not found")

View File

@ -7,12 +7,8 @@ load(
go_library(
name = "go_default_library",
srcs = [
"gpu_manager_stub.go",
"types.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/gpu",
deps = ["//vendor/k8s.io/api/core/v1:go_default_library"],
srcs = ["util.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing",
)
filegroup(
@ -26,7 +22,7 @@ filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/kubelet/gpu/nvidia:all-srcs",
"//pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -0,0 +1,23 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["types.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1",
visibility = ["//visibility:public"],
deps = ["//pkg/kubelet/checkpointmanager/checksum:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,62 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
"encoding/json"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
)
type protocol string
// portMapping is the port mapping configurations of a sandbox.
type PortMapping struct {
// protocol of the port mapping.
Protocol *protocol
// Port number within the container.
ContainerPort *int32
// Port number on the host.
HostPort *int32
}
// CheckpointData contains all types of data that can be stored in the checkpoint.
type Data struct {
PortMappings []*PortMapping `json:"port_mappings,omitempty"`
HostNetwork bool `json:"host_network,omitempty"`
}
// CheckpointData is a sample example structure to be used in test cases for checkpointing
type CheckpointData struct {
Version string
Name string
Data *Data
Checksum checksum.Checksum
}
func (cp *CheckpointData) MarshalCheckpoint() ([]byte, error) {
cp.Checksum = checksum.New(*cp.Data)
return json.Marshal(*cp)
}
func (cp *CheckpointData) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, cp)
}
func (cp *CheckpointData) VerifyChecksum() error {
return cp.Checksum.Verify(*cp.Data)
}

View File

@ -27,10 +27,12 @@ type MemStore struct {
sync.Mutex
}
// NewMemStore returns an instance of MemStore
func NewMemStore() *MemStore {
return &MemStore{mem: make(map[string][]byte)}
}
// Write writes the data to the store
func (mstore *MemStore) Write(key string, data []byte) error {
mstore.Lock()
defer mstore.Unlock()
@ -38,6 +40,7 @@ func (mstore *MemStore) Write(key string, data []byte) error {
return nil
}
// Read returns data read from store
func (mstore *MemStore) Read(key string) ([]byte, error) {
mstore.Lock()
defer mstore.Unlock()
@ -48,6 +51,7 @@ func (mstore *MemStore) Read(key string) ([]byte, error) {
return data, nil
}
// Delete deletes data from the store
func (mstore *MemStore) Delete(key string) error {
mstore.Lock()
defer mstore.Unlock()
@ -55,6 +59,7 @@ func (mstore *MemStore) Delete(key string) error {
return nil
}
// List returns all the keys from the store
func (mstore *MemStore) List() ([]string, error) {
mstore.Lock()
defer mstore.Unlock()

View File

@ -74,7 +74,7 @@ func MakeTransport(config *KubeletClientConfig) (http.RoundTripper, error) {
rt := http.DefaultTransport
if config.Dial != nil || tlsConfig != nil {
rt = utilnet.SetOldTransportDefaults(&http.Transport{
Dial: config.Dial,
DialContext: config.Dial,
TLSClientConfig: tlsConfig,
})
}

View File

@ -91,9 +91,10 @@ go_library(
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/status:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
@ -140,7 +141,6 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
@ -188,6 +188,7 @@ go_test(
"container_manager_linux_test.go",
"helpers_linux_test.go",
"node_container_manager_test.go",
"pod_container_manager_linux_test.go",
],
"//conditions:default": [],
}),
@ -200,6 +201,7 @@ go_test(
"//vendor/github.com/stretchr/testify/require:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
],
"//conditions:default": [],
}),

View File

@ -53,74 +53,79 @@ const (
// which is what is expected when interacting with libcontainer
var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"}
// ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name.
// For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice
// If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form.
// For example, it will return /Burstable.slice/Burstable-pod_123_456.slice in above scenario.
func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string {
name := string(cgroupName)
result := ""
if name != "" && name != "/" {
parts := strings.Split(name, "/")
results := []string{}
for _, part := range parts {
// ignore leading stuff
if part == "" {
continue
}
// detect if we are given a systemd style name.
// if so, we do not want to do double encoding.
if IsSystemdStyleName(part) {
part = strings.TrimSuffix(part, systemdSuffix)
separatorIndex := strings.LastIndex(part, "-")
if separatorIndex >= 0 && separatorIndex < len(part) {
part = part[separatorIndex+1:]
}
} else {
// systemd treats - as a step in the hierarchy, we convert all - to _
part = strings.Replace(part, "-", "_", -1)
}
results = append(results, part)
var RootCgroupName = CgroupName([]string{})
// NewCgroupName composes a new cgroup name.
// Use RootCgroupName as base to start at the root.
// This function does some basic check for invalid characters at the name.
func NewCgroupName(base CgroupName, components ...string) CgroupName {
for _, component := range components {
// Forbit using "_" in internal names. When remapping internal
// names to systemd cgroup driver, we want to remap "-" => "_",
// so we forbid "_" so that we can always reverse the mapping.
if strings.Contains(component, "/") || strings.Contains(component, "_") {
panic(fmt.Errorf("invalid character in component [%q] of CgroupName", component))
}
// each part is appended with systemd style -
result = strings.Join(results, "-")
} else {
// root converts to -
result = "-"
}
// always have a .slice suffix
if !IsSystemdStyleName(result) {
result = result + systemdSuffix
return CgroupName(append(base, components...))
}
func escapeSystemdCgroupName(part string) string {
return strings.Replace(part, "-", "_", -1)
}
func unescapeSystemdCgroupName(part string) string {
return strings.Replace(part, "_", "-", -1)
}
// cgroupName.ToSystemd converts the internal cgroup name to a systemd name.
// For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes
// "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice"
// This function always expands the systemd name into the cgroupfs form. If only
// the last part is needed, use path.Base(...) on it to discard the rest.
func (cgroupName CgroupName) ToSystemd() string {
if len(cgroupName) == 0 || (len(cgroupName) == 1 && cgroupName[0] == "") {
return "/"
}
newparts := []string{}
for _, part := range cgroupName {
part = escapeSystemdCgroupName(part)
newparts = append(newparts, part)
}
// if the caller desired the result in cgroupfs format...
if outputToCgroupFs {
var err error
result, err = cgroupsystemd.ExpandSlice(result)
if err != nil {
panic(fmt.Errorf("error adapting cgroup name, input: %v, err: %v", name, err))
}
result, err := cgroupsystemd.ExpandSlice(strings.Join(newparts, "-") + systemdSuffix)
if err != nil {
// Should never happen...
panic(fmt.Errorf("error converting cgroup name [%v] to systemd format: %v", cgroupName, err))
}
return result
}
// ConvertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name.
// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice
// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those
// code areas could use something from libcontainer if we get this style function upstream.
func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) {
// TODO: see if libcontainer systemd implementation could use something similar, and if so, move
// this function up to that library. At that time, it would most likely do validation specific to systemd
// above and beyond the simple assumption here that the base of the path encodes the hierarchy
// per systemd convention.
return path.Base(cgroupfsName), nil
func ParseSystemdToCgroupName(name string) CgroupName {
driverName := path.Base(name)
driverName = strings.TrimSuffix(driverName, systemdSuffix)
parts := strings.Split(driverName, "-")
result := []string{}
for _, part := range parts {
result = append(result, unescapeSystemdCgroupName(part))
}
return CgroupName(result)
}
func (cgroupName CgroupName) ToCgroupfs() string {
return "/" + path.Join(cgroupName...)
}
func ParseCgroupfsToCgroupName(name string) CgroupName {
components := strings.Split(strings.TrimPrefix(name, "/"), "/")
if len(components) == 1 && components[0] == "" {
components = []string{}
}
return CgroupName(components)
}
func IsSystemdStyleName(name string) bool {
if strings.HasSuffix(name, systemdSuffix) {
return true
}
return false
return strings.HasSuffix(name, systemdSuffix)
}
// libcontainerAdapter provides a simplified interface to libcontainer based on libcontainer type.
@ -156,34 +161,6 @@ func (l *libcontainerAdapter) newManager(cgroups *libcontainerconfigs.Cgroup, pa
return nil, fmt.Errorf("invalid cgroup manager configuration")
}
func (l *libcontainerAdapter) revertName(name string) CgroupName {
if l.cgroupManagerType != libcontainerSystemd {
return CgroupName(name)
}
return CgroupName(RevertFromSystemdToCgroupStyleName(name))
}
func RevertFromSystemdToCgroupStyleName(name string) string {
driverName, err := ConvertCgroupFsNameToSystemd(name)
if err != nil {
panic(err)
}
driverName = strings.TrimSuffix(driverName, systemdSuffix)
driverName = strings.Replace(driverName, "-", "/", -1)
driverName = strings.Replace(driverName, "_", "-", -1)
return driverName
}
// adaptName converts a CgroupName identifier to a driver specific conversion value.
// if outputToCgroupFs is true, the result is returned in the cgroupfs format rather than the driver specific form.
func (l *libcontainerAdapter) adaptName(cgroupName CgroupName, outputToCgroupFs bool) string {
if l.cgroupManagerType != libcontainerSystemd {
name := string(cgroupName)
return name
}
return ConvertCgroupNameToSystemd(cgroupName, outputToCgroupFs)
}
// CgroupSubsystems holds information about the mounted cgroup subsystems
type CgroupSubsystems struct {
// Cgroup subsystem mounts.
@ -223,13 +200,22 @@ func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager {
}
// Name converts the cgroup to the driver specific value in cgroupfs form.
// This always returns a valid cgroupfs path even when systemd driver is in use!
func (m *cgroupManagerImpl) Name(name CgroupName) string {
return m.adapter.adaptName(name, true)
if m.adapter.cgroupManagerType == libcontainerSystemd {
return name.ToSystemd()
} else {
return name.ToCgroupfs()
}
}
// CgroupName converts the literal cgroupfs name on the host to an internal identifier.
func (m *cgroupManagerImpl) CgroupName(name string) CgroupName {
return m.adapter.revertName(name)
if m.adapter.cgroupManagerType == libcontainerSystemd {
return ParseSystemdToCgroupName(name)
} else {
return ParseCgroupfsToCgroupName(name)
}
}
// buildCgroupPaths builds a path to each cgroup subsystem for the specified name.
@ -242,6 +228,22 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string
return cgroupPaths
}
// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead.
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent)
// and split it appropriately, using essentially the logic below.
// This was done for cgroupfs in opencontainers/runc#497 but a counterpart
// for systemd was never introduced.
func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupName CgroupName) {
dir, base := path.Split(cgroupName.ToSystemd())
if dir == "/" {
dir = "-.slice"
} else {
dir = path.Base(dir)
}
cgroupConfig.Parent = dir
cgroupConfig.Name = base
}
// Exists checks if all subsystem cgroups already exist
func (m *cgroupManagerImpl) Exists(name CgroupName) bool {
// Get map of all cgroup paths on the system for the particular cgroup
@ -278,23 +280,13 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error {
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
// we take the location in traditional cgroupfs format.
abstractCgroupFsName := string(cgroupConfig.Name)
abstractParent := CgroupName(path.Dir(abstractCgroupFsName))
abstractName := CgroupName(path.Base(abstractCgroupFsName))
driverParent := m.adapter.adaptName(abstractParent, false)
driverName := m.adapter.adaptName(abstractName, false)
// this is an ugly abstraction bleed, but systemd cgroup driver requires full paths...
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{}
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
driverName = m.adapter.adaptName(cgroupConfig.Name, false)
}
// Initialize libcontainer's cgroup config with driver specific naming.
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Name: driverName,
Parent: driverParent,
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
manager, err := m.adapter.newManager(libcontainerCgroupConfig, cgroupPaths)
@ -418,26 +410,17 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
// we take the location in traditional cgroupfs format.
abstractCgroupFsName := string(cgroupConfig.Name)
abstractParent := CgroupName(path.Dir(abstractCgroupFsName))
abstractName := CgroupName(path.Base(abstractCgroupFsName))
driverParent := m.adapter.adaptName(abstractParent, false)
driverName := m.adapter.adaptName(abstractName, false)
// this is an ugly abstraction bleed, but systemd cgroup driver requires full paths...
if m.adapter.cgroupManagerType == libcontainerSystemd {
driverName = m.adapter.adaptName(cgroupConfig.Name, false)
}
// Initialize libcontainer's cgroup config
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Name: driverName,
Parent: driverParent,
Resources: resources,
Paths: cgroupPaths,
}
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
@ -456,25 +439,18 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
metrics.CgroupManagerLatency.WithLabelValues("create").Observe(metrics.SinceInMicroseconds(start))
}()
// we take the location in traditional cgroupfs format.
abstractCgroupFsName := string(cgroupConfig.Name)
abstractParent := CgroupName(path.Dir(abstractCgroupFsName))
abstractName := CgroupName(path.Base(abstractCgroupFsName))
driverParent := m.adapter.adaptName(abstractParent, false)
driverName := m.adapter.adaptName(abstractName, false)
// this is an ugly abstraction bleed, but systemd cgroup driver requires full paths...
if m.adapter.cgroupManagerType == libcontainerSystemd {
driverName = m.adapter.adaptName(cgroupConfig.Name, false)
}
resources := m.toResources(cgroupConfig.ResourceParameters)
// Initialize libcontainer's cgroup config with driver specific naming.
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Name: driverName,
Parent: driverParent,
Resources: resources,
}
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit

View File

@ -18,119 +18,105 @@ limitations under the License.
package cm
import "testing"
import (
"path"
"testing"
)
func TestLibcontainerAdapterAdaptToSystemd(t *testing.T) {
func TestCgroupNameToSystemdBasename(t *testing.T) {
testCases := []struct {
input string
input CgroupName
expected string
}{
{
input: "/",
expected: "-.slice",
input: RootCgroupName,
expected: "/",
},
{
input: "/system.slice",
input: NewCgroupName(RootCgroupName, "system"),
expected: "system.slice",
},
{
input: "/system.slice/Burstable",
input: NewCgroupName(RootCgroupName, "system", "Burstable"),
expected: "system-Burstable.slice",
},
{
input: "/Burstable.slice/Burstable-pod_123.slice",
input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"),
expected: "Burstable-pod_123.slice",
},
{
input: "/test.slice/test-a.slice/test-a-b.slice",
input: NewCgroupName(RootCgroupName, "test", "a", "b"),
expected: "test-a-b.slice",
},
{
input: "/test.slice/test-a.slice/test-a-b.slice/Burstable",
input: NewCgroupName(RootCgroupName, "test", "a", "b", "Burstable"),
expected: "test-a-b-Burstable.slice",
},
{
input: "/Burstable",
input: NewCgroupName(RootCgroupName, "Burstable"),
expected: "Burstable.slice",
},
{
input: "/Burstable/pod_123",
expected: "Burstable-pod_123.slice",
},
{
input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d",
input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"),
expected: "BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice",
},
}
for _, testCase := range testCases {
f := newLibcontainerAdapter(libcontainerSystemd)
if actual := f.adaptName(CgroupName(testCase.input), false); actual != testCase.expected {
if actual := path.Base(testCase.input.ToSystemd()); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
}
}
func TestLibcontainerAdapterAdaptToSystemdAsCgroupFs(t *testing.T) {
func TestCgroupNameToSystemd(t *testing.T) {
testCases := []struct {
input string
input CgroupName
expected string
}{
{
input: "/",
input: RootCgroupName,
expected: "/",
},
{
input: "/Burstable",
input: NewCgroupName(RootCgroupName, "Burstable"),
expected: "/Burstable.slice",
},
{
input: "/Burstable/pod_123",
input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"),
expected: "/Burstable.slice/Burstable-pod_123.slice",
},
{
input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d",
input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"),
expected: "/BestEffort.slice/BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice",
},
{
input: "/kubepods",
input: NewCgroupName(RootCgroupName, "kubepods"),
expected: "/kubepods.slice",
},
}
for _, testCase := range testCases {
f := newLibcontainerAdapter(libcontainerSystemd)
if actual := f.adaptName(CgroupName(testCase.input), true); actual != testCase.expected {
if actual := testCase.input.ToSystemd(); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
}
}
func TestLibcontainerAdapterNotAdaptToSystemd(t *testing.T) {
cgroupfs := newLibcontainerAdapter(libcontainerCgroupfs)
otherAdatper := newLibcontainerAdapter(libcontainerCgroupManagerType("test"))
func TestCgroupNameToCgroupfs(t *testing.T) {
testCases := []struct {
input string
input CgroupName
expected string
}{
{
input: "/",
input: RootCgroupName,
expected: "/",
},
{
input: "/Burstable",
input: NewCgroupName(RootCgroupName, "Burstable"),
expected: "/Burstable",
},
{
input: "",
expected: "",
},
}
for _, testCase := range testCases {
if actual := cgroupfs.adaptName(CgroupName(testCase.input), true); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
if actual := otherAdatper.adaptName(CgroupName(testCase.input), true); actual != testCase.expected {
if actual := testCase.input.ToCgroupfs(); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
}

View File

@ -63,25 +63,35 @@ func (m *unsupportedCgroupManager) Pids(_ CgroupName) []int {
}
func (m *unsupportedCgroupManager) CgroupName(name string) CgroupName {
return ""
return CgroupName([]string{})
}
func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error {
return nil
}
func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) {
return "", nil
var RootCgroupName = CgroupName([]string{})
func NewCgroupName(base CgroupName, components ...string) CgroupName {
return CgroupName(append(base, components...))
}
func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string {
func (cgroupName CgroupName) ToSystemd() string {
return ""
}
func RevertFromSystemdToCgroupStyleName(name string) string {
func ParseSystemdToCgroupName(name string) CgroupName {
return nil
}
func (cgroupName CgroupName) ToCgroupfs() string {
return ""
}
func ParseCgroupfsToCgroupName(name string) CgroupName {
return nil
}
func IsSystemdStyleName(name string) bool {
return false
}

View File

@ -28,7 +28,7 @@ import (
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"fmt"
"strconv"
@ -107,10 +107,11 @@ type NodeConfig struct {
KubeletRootDir string
ProtectKernelDefaults bool
NodeAllocatableConfig
ExperimentalQOSReserved map[v1.ResourceName]int64
QOSReserved map[v1.ResourceName]int64
ExperimentalCPUManagerPolicy string
ExperimentalCPUManagerReconcilePeriod time.Duration
ExperimentalPodPidsLimit int64
EnforceCPULimits bool
}
type NodeAllocatableConfig struct {

View File

@ -52,7 +52,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
utilfile "k8s.io/kubernetes/pkg/util/file"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/util/oom"
@ -123,7 +123,7 @@ type containerManagerImpl struct {
capacity v1.ResourceList
// Absolute cgroupfs path to a cgroup that Kubelet needs to place all pods under.
// This path include a top level container for enforcing Node Allocatable.
cgroupRoot string
cgroupRoot CgroupName
// Event recorder interface.
recorder record.EventRecorder
// Interface for QoS cgroup management
@ -223,7 +223,8 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
}
capacity = cadvisor.CapacityFromMachineInfo(machineInfo)
cgroupRoot := nodeConfig.CgroupRoot
// Turn CgroupRoot from a string (in cgroupfs path format) to internal CgroupName
cgroupRoot := ParseCgroupfsToCgroupName(nodeConfig.CgroupRoot)
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
// Check if Cgroup-root actually exists on the node
if nodeConfig.CgroupsPerQOS {
@ -236,13 +237,13 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
// of note, we always use the cgroupfs driver when performing this check since
// the input is provided in that format.
// this is important because we do not want any name conversion to occur.
if !cgroupManager.Exists(CgroupName(cgroupRoot)) {
if !cgroupManager.Exists(cgroupRoot) {
return nil, fmt.Errorf("invalid configuration: cgroup-root %q doesn't exist: %v", cgroupRoot, err)
}
glog.Infof("container manager verified user specified cgroup-root exists: %v", cgroupRoot)
// Include the top level cgroup for enforcing node allocatable into cgroup-root.
// This way, all sub modules can avoid having to understand the concept of node allocatable.
cgroupRoot = path.Join(cgroupRoot, defaultNodeAllocatableCgroupName)
cgroupRoot = NewCgroupName(cgroupRoot, defaultNodeAllocatableCgroupName)
}
glog.Infof("Creating Container Manager object based on Node Config: %+v", nodeConfig)
@ -301,10 +302,11 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
subsystems: cm.subsystems,
cgroupManager: cm.cgroupManager,
podPidsLimit: cm.ExperimentalPodPidsLimit,
enforceCPULimits: cm.EnforceCPULimits,
}
}
return &podContainerManagerNoop{
cgroupRoot: CgroupName(cm.cgroupRoot),
cgroupRoot: cm.cgroupRoot,
}
}
@ -502,7 +504,7 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig {
// GetPodCgroupRoot returns the literal cgroupfs value for the cgroup containing all pods.
func (cm *containerManagerImpl) GetPodCgroupRoot() string {
return cm.cgroupManager.Name(CgroupName(cm.cgroupRoot))
return cm.cgroupManager.Name(cm.cgroupRoot)
}
func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems {
@ -858,21 +860,6 @@ func isKernelPid(pid int) bool {
return err != nil
}
// Helper for getting the docker API version.
func getDockerAPIVersion(cadvisor cadvisor.Interface) *utilversion.Version {
versions, err := cadvisor.VersionInfo()
if err != nil {
glog.Errorf("Error requesting cAdvisor VersionInfo: %v", err)
return utilversion.MustParseSemantic("0.0")
}
dockerAPIVersion, err := utilversion.ParseGeneric(versions.DockerAPIVersion)
if err != nil {
glog.Errorf("Error parsing docker version %q: %v", versions.DockerVersion, err)
return utilversion.MustParseSemantic("0.0")
}
return dockerAPIVersion
}
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList {
return cm.capacity
}

View File

@ -19,6 +19,7 @@ limitations under the License.
package cm
import (
"errors"
"fmt"
"io/ioutil"
"os"
@ -91,8 +92,36 @@ func (mi *fakeMountInterface) MakeFile(pathname string) error {
return nil
}
func (mi *fakeMountInterface) ExistsPath(pathname string) bool {
return true
func (mi *fakeMountInterface) ExistsPath(pathname string) (bool, error) {
return true, errors.New("not implemented")
}
func (mi *fakeMountInterface) PrepareSafeSubpath(subPath mount.Subpath) (newHostPath string, cleanupAction func(), err error) {
return "", nil, nil
}
func (mi *fakeMountInterface) CleanSubPaths(_, _ string) error {
return nil
}
func (mi *fakeMountInterface) SafeMakeDir(_, _ string, _ os.FileMode) error {
return nil
}
func (mi *fakeMountInterface) GetMountRefs(pathname string) ([]string, error) {
return nil, errors.New("not implemented")
}
func (mi *fakeMountInterface) GetFSGroup(pathname string) (int64, error) {
return -1, errors.New("not implemented")
}
func (mi *fakeMountInterface) GetSELinuxSupport(pathname string) (bool, error) {
return false, errors.New("not implemented")
}
func (mi *fakeMountInterface) GetMode(pathname string) (os.FileMode, error) {
return 0, errors.New("not implemented")
}
func fakeContainerMgrMountInt() mount.Interface {

View File

@ -20,13 +20,14 @@ import (
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
"k8s.io/kubernetes/pkg/kubelet/config"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
type containerManagerStub struct{}
@ -67,7 +68,12 @@ func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList
}
func (cm *containerManagerStub) GetCapacity() v1.ResourceList {
return nil
c := v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewQuantity(
int64(0),
resource.BinarySI),
}
return c
}
func (cm *containerManagerStub) GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) {

View File

@ -18,7 +18,6 @@ package state
import (
"bytes"
"flag"
"fmt"
"io"
"io/ioutil"
@ -69,9 +68,6 @@ func stderrCapture(t *testing.T, f func() State) (bytes.Buffer, State) {
}
func TestFileStateTryRestore(t *testing.T) {
flag.Set("alsologtostderr", "true")
flag.Parse()
testCases := []struct {
description string
stateFileContent string
@ -292,9 +288,6 @@ func TestFileStateTryRestorePanic(t *testing.T) {
}
func TestUpdateStateFile(t *testing.T) {
flag.Set("alsologtostderr", "true")
flag.Parse()
testCases := []struct {
description string
expErr string

View File

@ -56,9 +56,6 @@ func (s *stateMemory) GetDefaultCPUSet() cpuset.CPUSet {
}
func (s *stateMemory) GetCPUSetOrDefault(containerID string) cpuset.CPUSet {
s.RLock()
defer s.RUnlock()
if res, ok := s.GetCPUSet(containerID); ok {
return res
}

View File

@ -15,15 +15,15 @@ go_library(
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
"//pkg/kubelet/cm/devicemanager/checkpoint:go_default_library",
"//pkg/kubelet/config:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/util/store:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/util/filesystem:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/golang.org/x/net/context:go_default_library",
"//vendor/google.golang.org/grpc:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
@ -40,10 +40,9 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/util/store:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/util/filesystem:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/github.com/stretchr/testify/require:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
@ -63,7 +62,10 @@ filegroup(
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
srcs = [
":package-srcs",
"//pkg/kubelet/cm/devicemanager/checkpoint:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,26 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["checkpoint.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint",
visibility = ["//visibility:public"],
deps = [
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,81 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package checkpoint
import (
"encoding/json"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
)
type DeviceManagerCheckpoint interface {
checkpointmanager.Checkpoint
GetData() ([]PodDevicesEntry, map[string][]string)
}
type PodDevicesEntry struct {
PodUID string
ContainerName string
ResourceName string
DeviceIDs []string
AllocResp []byte
}
// checkpointData struct is used to store pod to device allocation information
// in a checkpoint file.
// TODO: add version control when we need to change checkpoint format.
type checkpointData struct {
PodDeviceEntries []PodDevicesEntry
RegisteredDevices map[string][]string
}
type Data struct {
Data checkpointData
Checksum checksum.Checksum
}
// NewDeviceManagerCheckpoint returns an instance of Checkpoint
func New(devEntries []PodDevicesEntry,
devices map[string][]string) DeviceManagerCheckpoint {
return &Data{
Data: checkpointData{
PodDeviceEntries: devEntries,
RegisteredDevices: devices,
},
}
}
// MarshalCheckpoint returns marshalled data
func (cp *Data) MarshalCheckpoint() ([]byte, error) {
cp.Checksum = checksum.New(cp.Data)
return json.Marshal(*cp)
}
// UnmarshalCheckpoint returns unmarshalled data
func (cp *Data) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, cp)
}
// VerifyChecksum verifies that passed checksum is same as calculated checksum
func (cp *Data) VerifyChecksum() error {
return cp.Checksum.Verify(cp.Data)
}
func (cp *Data) GetData() ([]PodDevicesEntry, map[string][]string) {
return cp.Data.PodDeviceEntries, cp.Data.RegisteredDevices
}

View File

@ -17,13 +17,14 @@ limitations under the License.
package devicemanager
import (
"context"
"log"
"net"
"os"
"path"
"sync"
"time"
"golang.org/x/net/context"
"google.golang.org/grpc"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
@ -35,6 +36,7 @@ type Stub struct {
socket string
stop chan interface{}
wg sync.WaitGroup
update chan []*pluginapi.Device
server *grpc.Server
@ -70,7 +72,8 @@ func (m *Stub) SetAllocFunc(f stubAllocFunc) {
m.allocFunc = f
}
// Start starts the gRPC server of the device plugin
// Start starts the gRPC server of the device plugin. Can only
// be called once.
func (m *Stub) Start() error {
err := m.cleanup()
if err != nil {
@ -82,10 +85,14 @@ func (m *Stub) Start() error {
return err
}
m.wg.Add(1)
m.server = grpc.NewServer([]grpc.ServerOption{}...)
pluginapi.RegisterDevicePluginServer(m.server, m)
go m.server.Serve(sock)
go func() {
defer m.wg.Done()
m.server.Serve(sock)
}()
_, conn, err := dial(m.socket)
if err != nil {
return err
@ -96,18 +103,27 @@ func (m *Stub) Start() error {
return nil
}
// Stop stops the gRPC server
// Stop stops the gRPC server. Can be called without a prior Start
// and more than once. Not safe to be called concurrently by different
// goroutines!
func (m *Stub) Stop() error {
if m.server == nil {
return nil
}
m.server.Stop()
close(m.stop)
m.wg.Wait()
m.server = nil
close(m.stop) // This prevents re-starting the server.
return m.cleanup()
}
// Register registers the device plugin for the given resourceName with Kubelet.
func (m *Stub) Register(kubeletEndpoint, resourceName string, preStartContainerFlag bool) error {
conn, err := grpc.Dial(kubeletEndpoint, grpc.WithInsecure(), grpc.WithBlock(),
grpc.WithTimeout(10*time.Second),
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
conn, err := grpc.DialContext(ctx, kubeletEndpoint, grpc.WithInsecure(), grpc.WithBlock(),
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
return net.DialTimeout("unix", addr, timeout)
}))

View File

@ -17,13 +17,13 @@ limitations under the License.
package devicemanager
import (
"context"
"fmt"
"net"
"sync"
"time"
"github.com/golang/glog"
"golang.org/x/net/context"
"google.golang.org/grpc"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
@ -39,6 +39,8 @@ type endpoint interface {
preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error)
getDevices() []pluginapi.Device
callback(resourceName string, added, updated, deleted []pluginapi.Device)
isStopped() bool
stopGracePeriodExpired() bool
}
type endpointImpl struct {
@ -47,6 +49,7 @@ type endpointImpl struct {
socketPath string
resourceName string
stopTime time.Time
devices map[string]pluginapi.Device
mutex sync.Mutex
@ -55,6 +58,7 @@ type endpointImpl struct {
}
// newEndpoint creates a new endpoint for the given resourceName.
// This is to be used during normal device plugin registration.
func newEndpointImpl(socketPath, resourceName string, devices map[string]pluginapi.Device, callback monitorCallback) (*endpointImpl, error) {
client, c, err := dial(socketPath)
if err != nil {
@ -74,6 +78,16 @@ func newEndpointImpl(socketPath, resourceName string, devices map[string]plugina
}, nil
}
// newStoppedEndpointImpl creates a new endpoint for the given resourceName with stopTime set.
// This is to be used during Kubelet restart, before the actual device plugin re-registers.
func newStoppedEndpointImpl(resourceName string, devices map[string]pluginapi.Device) *endpointImpl {
return &endpointImpl{
resourceName: resourceName,
devices: devices,
stopTime: time.Now(),
}
}
func (e *endpointImpl) callback(resourceName string, added, updated, deleted []pluginapi.Device) {
e.cb(resourceName, added, updated, deleted)
}
@ -176,8 +190,30 @@ func (e *endpointImpl) run() {
}
}
func (e *endpointImpl) isStopped() bool {
e.mutex.Lock()
defer e.mutex.Unlock()
return !e.stopTime.IsZero()
}
func (e *endpointImpl) stopGracePeriodExpired() bool {
e.mutex.Lock()
defer e.mutex.Unlock()
return !e.stopTime.IsZero() && time.Since(e.stopTime) > endpointStopGracePeriod
}
// used for testing only
func (e *endpointImpl) setStopTime(t time.Time) {
e.mutex.Lock()
defer e.mutex.Unlock()
e.stopTime = t
}
// allocate issues Allocate gRPC call to the device plugin.
func (e *endpointImpl) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
if e.isStopped() {
return nil, fmt.Errorf(errEndpointStopped, e)
}
return e.client.Allocate(context.Background(), &pluginapi.AllocateRequest{
ContainerRequests: []*pluginapi.ContainerAllocateRequest{
{DevicesIDs: devs},
@ -187,6 +223,9 @@ func (e *endpointImpl) allocate(devs []string) (*pluginapi.AllocateResponse, err
// preStartContainer issues PreStartContainer gRPC call to the device plugin.
func (e *endpointImpl) preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error) {
if e.isStopped() {
return nil, fmt.Errorf(errEndpointStopped, e)
}
ctx, cancel := context.WithTimeout(context.Background(), pluginapi.KubeletPreStartContainerRPCTimeoutInSecs*time.Second)
defer cancel()
return e.client.PreStartContainer(ctx, &pluginapi.PreStartContainerRequest{
@ -195,13 +234,20 @@ func (e *endpointImpl) preStartContainer(devs []string) (*pluginapi.PreStartCont
}
func (e *endpointImpl) stop() {
e.clientConn.Close()
e.mutex.Lock()
defer e.mutex.Unlock()
if e.clientConn != nil {
e.clientConn.Close()
}
e.stopTime = time.Now()
}
// dial establishes the gRPC communication with the registered device plugin. https://godoc.org/google.golang.org/grpc#Dial
func dial(unixSocketPath string) (pluginapi.DevicePluginClient, *grpc.ClientConn, error) {
c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
grpc.WithTimeout(10*time.Second),
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
c, err := grpc.DialContext(ctx, unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
return net.DialTimeout("unix", addr, timeout)
}),

View File

@ -17,7 +17,7 @@ limitations under the License.
package devicemanager
import (
"encoding/json"
"context"
"fmt"
"net"
"os"
@ -26,7 +26,6 @@ import (
"time"
"github.com/golang/glog"
"golang.org/x/net/context"
"google.golang.org/grpc"
"k8s.io/api/core/v1"
@ -34,12 +33,13 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
"k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// ActivePodsFunc is a function that returns a list of pods to reconcile.
@ -59,6 +59,7 @@ type ManagerImpl struct {
mutex sync.Mutex
server *grpc.Server
wg sync.WaitGroup
// activePods is a method for listing active pods on the node
// so the amount of pluginResources requested by existing pods
@ -83,9 +84,9 @@ type ManagerImpl struct {
allocatedDevices map[string]sets.String
// podDevices contains pod to allocated device mapping.
podDevices podDevices
store utilstore.Store
pluginOpts map[string]*pluginapi.DevicePluginOptions
podDevices podDevices
pluginOpts map[string]*pluginapi.DevicePluginOptions
checkpointManager checkpointmanager.CheckpointManager
}
type sourcesReadyStub struct{}
@ -122,11 +123,11 @@ func newManagerImpl(socketPath string) (*ManagerImpl, error) {
// Before that, initializes them to perform no-op operations.
manager.activePods = func() []*v1.Pod { return []*v1.Pod{} }
manager.sourcesReady = &sourcesReadyStub{}
var err error
manager.store, err = utilstore.NewFileStore(dir, utilfs.DefaultFs{})
checkpointManager, err := checkpointmanager.NewCheckpointManager(dir)
if err != nil {
return nil, fmt.Errorf("failed to initialize device plugin checkpointing store: %+v", err)
return nil, fmt.Errorf("failed to initialize checkpoint manager: %+v", err)
}
manager.checkpointManager = checkpointManager
return manager, nil
}
@ -188,11 +189,6 @@ func (m *ManagerImpl) removeContents(dir string) error {
return nil
}
const (
// kubeletDeviceManagerCheckpoint is the file name of device plugin checkpoint
kubeletDeviceManagerCheckpoint = "kubelet_internal_checkpoint"
)
// checkpointFile returns device plugin checkpoint file path.
func (m *ManagerImpl) checkpointFile() string {
return filepath.Join(m.socketdir, kubeletDeviceManagerCheckpoint)
@ -229,10 +225,14 @@ func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.Sourc
return err
}
m.wg.Add(1)
m.server = grpc.NewServer([]grpc.ServerOption{}...)
pluginapi.RegisterRegistrationServer(m.server, m)
go m.server.Serve(s)
go func() {
defer m.wg.Done()
m.server.Serve(s)
}()
glog.V(2).Infof("Serving device plugin registration server on %q", socketPath)
@ -318,6 +318,8 @@ func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest
}
// Stop is the function that can stop the gRPC server.
// Can be called concurrently, more than once, and is safe to call
// without a prior Start.
func (m *ManagerImpl) Stop() error {
m.mutex.Lock()
defer m.mutex.Unlock()
@ -325,7 +327,12 @@ func (m *ManagerImpl) Stop() error {
e.stop()
}
if m.server == nil {
return nil
}
m.server.Stop()
m.wg.Wait()
m.server = nil
return nil
}
@ -338,6 +345,7 @@ func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
// to avoid potential orphaned devices upon re-registration
devices := make(map[string]pluginapi.Device)
for _, device := range old.getDevices() {
device.Health = pluginapi.Unhealthy
devices[device.ID] = device
}
existingDevs = devices
@ -377,18 +385,28 @@ func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
go func() {
e.run()
e.stop()
m.mutex.Lock()
if old, ok := m.endpoints[r.ResourceName]; ok && old == e {
glog.V(2).Infof("Delete resource for endpoint %v", e)
delete(m.endpoints, r.ResourceName)
m.markResourceUnhealthy(r.ResourceName)
}
glog.V(2).Infof("Unregistered endpoint %v", e)
m.mutex.Unlock()
}()
}
func (m *ManagerImpl) markResourceUnhealthy(resourceName string) {
glog.V(2).Infof("Mark all resources Unhealthy for resource %s", resourceName)
healthyDevices := sets.NewString()
if _, ok := m.healthyDevices[resourceName]; ok {
healthyDevices = m.healthyDevices[resourceName]
m.healthyDevices[resourceName] = sets.NewString()
}
if _, ok := m.unhealthyDevices[resourceName]; !ok {
m.unhealthyDevices[resourceName] = sets.NewString()
}
m.unhealthyDevices[resourceName] = m.unhealthyDevices[resourceName].Union(healthyDevices)
}
// GetCapacity is expected to be called when Kubelet updates its node status.
// The first returned variable contains the registered device plugin resource capacity.
// The second returned variable contains the registered device plugin resource allocatable.
@ -405,12 +423,20 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
needsUpdateCheckpoint := false
var capacity = v1.ResourceList{}
var allocatable = v1.ResourceList{}
var deletedResources []string
deletedResources := sets.NewString()
m.mutex.Lock()
for resourceName, devices := range m.healthyDevices {
if _, ok := m.endpoints[resourceName]; !ok {
e, ok := m.endpoints[resourceName]
if (ok && e.stopGracePeriodExpired()) || !ok {
// The resources contained in endpoints and (un)healthyDevices
// should always be consistent. Otherwise, we run with the risk
// of failing to garbage collect non-existing resources or devices.
if !ok {
glog.Errorf("unexpected: healthyDevices and endpoints are out of sync")
}
delete(m.endpoints, resourceName)
delete(m.healthyDevices, resourceName)
deletedResources = append(deletedResources, resourceName)
deletedResources.Insert(resourceName)
needsUpdateCheckpoint = true
} else {
capacity[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(devices.Len()), resource.DecimalSI)
@ -418,17 +444,14 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
}
}
for resourceName, devices := range m.unhealthyDevices {
if _, ok := m.endpoints[resourceName]; !ok {
e, ok := m.endpoints[resourceName]
if (ok && e.stopGracePeriodExpired()) || !ok {
if !ok {
glog.Errorf("unexpected: unhealthyDevices and endpoints are out of sync")
}
delete(m.endpoints, resourceName)
delete(m.unhealthyDevices, resourceName)
alreadyDeleted := false
for _, name := range deletedResources {
if name == resourceName {
alreadyDeleted = true
}
}
if !alreadyDeleted {
deletedResources = append(deletedResources, resourceName)
}
deletedResources.Insert(resourceName)
needsUpdateCheckpoint = true
} else {
capacityCount := capacity[v1.ResourceName(resourceName)]
@ -441,36 +464,22 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
if needsUpdateCheckpoint {
m.writeCheckpoint()
}
return capacity, allocatable, deletedResources
}
// checkpointData struct is used to store pod to device allocation information
// and registered device information in a checkpoint file.
// TODO: add version control when we need to change checkpoint format.
type checkpointData struct {
PodDeviceEntries []podDevicesCheckpointEntry
RegisteredDevices map[string][]string
return capacity, allocatable, deletedResources.UnsortedList()
}
// Checkpoints device to container allocation information to disk.
func (m *ManagerImpl) writeCheckpoint() error {
m.mutex.Lock()
data := checkpointData{
PodDeviceEntries: m.podDevices.toCheckpointData(),
RegisteredDevices: make(map[string][]string),
}
registeredDevs := make(map[string][]string)
for resource, devices := range m.healthyDevices {
data.RegisteredDevices[resource] = devices.UnsortedList()
registeredDevs[resource] = devices.UnsortedList()
}
data := checkpoint.New(m.podDevices.toCheckpointData(),
registeredDevs)
m.mutex.Unlock()
dataJSON, err := json.Marshal(data)
err := m.checkpointManager.CreateCheckpoint(kubeletDeviceManagerCheckpoint, data)
if err != nil {
return err
}
err = m.store.Write(kubeletDeviceManagerCheckpoint, dataJSON)
if err != nil {
return fmt.Errorf("failed to write deviceplugin checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
return fmt.Errorf("failed to write checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
}
return nil
}
@ -478,29 +487,28 @@ func (m *ManagerImpl) writeCheckpoint() error {
// Reads device to container allocation information from disk, and populates
// m.allocatedDevices accordingly.
func (m *ManagerImpl) readCheckpoint() error {
content, err := m.store.Read(kubeletDeviceManagerCheckpoint)
registeredDevs := make(map[string][]string)
devEntries := make([]checkpoint.PodDevicesEntry, 0)
cp := checkpoint.New(devEntries, registeredDevs)
err := m.checkpointManager.GetCheckpoint(kubeletDeviceManagerCheckpoint, cp)
if err != nil {
if err == utilstore.ErrKeyNotFound {
if err == errors.ErrCheckpointNotFound {
glog.Warningf("Failed to retrieve checkpoint for %q: %v", kubeletDeviceManagerCheckpoint, err)
return nil
}
return fmt.Errorf("failed to read checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
return err
}
glog.V(4).Infof("Read checkpoint file %s\n", kubeletDeviceManagerCheckpoint)
var data checkpointData
if err := json.Unmarshal(content, &data); err != nil {
return fmt.Errorf("failed to unmarshal deviceplugin checkpoint data: %v", err)
}
m.mutex.Lock()
defer m.mutex.Unlock()
m.podDevices.fromCheckpointData(data.PodDeviceEntries)
podDevices, registeredDevs := cp.GetData()
m.podDevices.fromCheckpointData(podDevices)
m.allocatedDevices = m.podDevices.devices()
for resource, devices := range data.RegisteredDevices {
// TODO: Support Checkpointing for unhealthy devices as well
for resource := range registeredDevs {
// During start up, creates empty healthyDevices list so that the resource capacity
// will stay zero till the corresponding device plugin re-registers.
m.healthyDevices[resource] = sets.NewString()
for _, dev := range devices {
m.healthyDevices[resource].Insert(dev)
}
m.unhealthyDevices[resource] = sets.NewString()
m.endpoints[resource] = newStoppedEndpointImpl(resource, make(map[string]pluginapi.Device))
}
return nil
}
@ -688,6 +696,8 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
return m.podDevices.deviceRunContainerOptions(string(pod.UID), container.Name), nil
}
// callPreStartContainerIfNeeded issues PreStartContainer grpc call for device plugin resource
// with PreStartRequired option set.
func (m *ManagerImpl) callPreStartContainerIfNeeded(podUID, contName, resource string) error {
m.mutex.Lock()
opts, ok := m.pluginOpts[resource]

View File

@ -21,7 +21,7 @@ import (
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
"k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// ManagerStub provides a simple stub implementation for the Device Manager.

View File

@ -17,7 +17,6 @@ limitations under the License.
package devicemanager
import (
"flag"
"fmt"
"io/ioutil"
"os"
@ -34,10 +33,9 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
const (
@ -69,6 +67,29 @@ func TestNewManagerImplStart(t *testing.T) {
defer os.RemoveAll(socketDir)
m, p := setup(t, []*pluginapi.Device{}, func(n string, a, u, r []pluginapi.Device) {}, socketName, pluginSocketName)
cleanup(t, m, p)
// Stop should tolerate being called more than once.
cleanup(t, m, p)
}
func TestNewManagerImplStop(t *testing.T) {
socketDir, socketName, pluginSocketName, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
m, err := newManagerImpl(socketName)
require.NoError(t, err)
// No prior Start, but that should be okay.
err = m.Stop()
require.NoError(t, err)
devs := []*pluginapi.Device{
{ID: "Dev1", Health: pluginapi.Healthy},
{ID: "Dev2", Health: pluginapi.Healthy},
}
p := NewDevicePluginStub(devs, pluginSocketName)
// Same here.
err = p.Stop()
require.NoError(t, err)
}
// Tests that the device plugin manager correctly handles registration and re-registration by
@ -192,7 +213,8 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
// Adds three devices for resource1, two healthy and one unhealthy.
// Expects capacity for resource1 to be 2.
resourceName1 := "domain1.com/resource1"
testManager.endpoints[resourceName1] = &endpointImpl{devices: make(map[string]pluginapi.Device)}
e1 := &endpointImpl{devices: make(map[string]pluginapi.Device)}
testManager.endpoints[resourceName1] = e1
callback(resourceName1, devs, []pluginapi.Device{}, []pluginapi.Device{})
capacity, allocatable, removedResources := testManager.GetCapacity()
resource1Capacity, ok := capacity[v1.ResourceName(resourceName1)]
@ -240,7 +262,8 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
// Tests adding another resource.
resourceName2 := "resource2"
testManager.endpoints[resourceName2] = &endpointImpl{devices: make(map[string]pluginapi.Device)}
e2 := &endpointImpl{devices: make(map[string]pluginapi.Device)}
testManager.endpoints[resourceName2] = e2
callback(resourceName2, devs, []pluginapi.Device{}, []pluginapi.Device{})
capacity, allocatable, removedResources = testManager.GetCapacity()
as.Equal(2, len(capacity))
@ -252,9 +275,9 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
as.Equal(int64(2), resource2Allocatable.Value())
as.Equal(0, len(removedResources))
// Removes resourceName1 endpoint. Verifies testManager.GetCapacity() reports that resourceName1
// Expires resourceName1 endpoint. Verifies testManager.GetCapacity() reports that resourceName1
// is removed from capacity and it no longer exists in healthyDevices after the call.
delete(testManager.endpoints, resourceName1)
e1.setStopTime(time.Now().Add(-1*endpointStopGracePeriod - time.Duration(10)*time.Second))
capacity, allocatable, removed := testManager.GetCapacity()
as.Equal([]string{resourceName1}, removed)
_, ok = capacity[v1.ResourceName(resourceName1)]
@ -266,9 +289,49 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
as.False(ok)
_, ok = testManager.unhealthyDevices[resourceName1]
as.False(ok)
fmt.Println("removed: ", removed)
as.Equal(1, len(removed))
_, ok = testManager.endpoints[resourceName1]
as.False(ok)
as.Equal(1, len(testManager.endpoints))
// Stops resourceName2 endpoint. Verifies its stopTime is set, allocate and
// preStartContainer calls return errors.
e2.stop()
as.False(e2.stopTime.IsZero())
_, err = e2.allocate([]string{"Device1"})
reflect.DeepEqual(err, fmt.Errorf(errEndpointStopped, e2))
_, err = e2.preStartContainer([]string{"Device1"})
reflect.DeepEqual(err, fmt.Errorf(errEndpointStopped, e2))
// Marks resourceName2 unhealthy and verifies its capacity/allocatable are
// correctly updated.
testManager.markResourceUnhealthy(resourceName2)
capacity, allocatable, removed = testManager.GetCapacity()
val, ok = capacity[v1.ResourceName(resourceName2)]
as.True(ok)
as.Equal(int64(3), val.Value())
val, ok = allocatable[v1.ResourceName(resourceName2)]
as.True(ok)
as.Equal(int64(0), val.Value())
as.Empty(removed)
// Writes and re-reads checkpoints. Verifies we create a stopped endpoint
// for resourceName2, its capacity is set to zero, and we still consider
// it as a DevicePlugin resource. This makes sure any pod that was scheduled
// during the time of propagating capacity change to the scheduler will be
// properly rejected instead of being incorrectly started.
err = testManager.writeCheckpoint()
as.Nil(err)
testManager.healthyDevices = make(map[string]sets.String)
testManager.unhealthyDevices = make(map[string]sets.String)
err = testManager.readCheckpoint()
as.Nil(err)
as.Equal(1, len(testManager.endpoints))
_, ok = testManager.endpoints[resourceName2]
as.True(ok)
capacity, allocatable, removed = testManager.GetCapacity()
val, ok = capacity[v1.ResourceName(resourceName2)]
as.True(ok)
as.Equal(int64(0), val.Value())
as.Empty(removed)
as.True(testManager.isDevicePluginResource(resourceName2))
}
func constructDevices(devices []string) sets.String {
@ -305,18 +368,19 @@ func constructAllocResp(devices, mounts, envs map[string]string) *pluginapi.Cont
func TestCheckpoint(t *testing.T) {
resourceName1 := "domain1.com/resource1"
resourceName2 := "domain2.com/resource2"
as := assert.New(t)
tmpDir, err := ioutil.TempDir("", "checkpoint")
as.Nil(err)
defer os.RemoveAll(tmpDir)
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
as.Nil(err)
testManager := &ManagerImpl{
socketdir: tmpDir,
healthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
endpoints: make(map[string]endpoint),
healthyDevices: make(map[string]sets.String),
unhealthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
checkpointManager: ckm,
}
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
testManager.podDevices.insert("pod1", "con1", resourceName1,
constructDevices([]string{"dev1", "dev2"}),
@ -414,6 +478,10 @@ func (m *MockEndpoint) allocate(devs []string) (*pluginapi.AllocateResponse, err
return nil, nil
}
func (m *MockEndpoint) isStopped() bool { return false }
func (m *MockEndpoint) stopGracePeriodExpired() bool { return false }
func makePod(limits v1.ResourceList) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
@ -431,20 +499,25 @@ func makePod(limits v1.ResourceList) *v1.Pod {
}
}
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource, opts map[string]*pluginapi.DevicePluginOptions) *ManagerImpl {
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource, opts map[string]*pluginapi.DevicePluginOptions) (*ManagerImpl, error) {
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
testManager := &ManagerImpl{
socketdir: tmpDir,
callback: monitorCallback,
healthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
endpoints: make(map[string]endpoint),
pluginOpts: opts,
podDevices: make(podDevices),
activePods: activePods,
sourcesReady: &sourcesReadyStub{},
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
if err != nil {
return nil, err
}
testManager := &ManagerImpl{
socketdir: tmpDir,
callback: monitorCallback,
healthyDevices: make(map[string]sets.String),
unhealthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
endpoints: make(map[string]endpoint),
pluginOpts: opts,
podDevices: make(podDevices),
activePods: activePods,
sourcesReady: &sourcesReadyStub{},
checkpointManager: ckm,
}
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
for _, res := range testRes {
testManager.healthyDevices[res.resourceName] = sets.NewString()
for _, dev := range res.devs {
@ -476,7 +549,7 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
}
}
}
return testManager
return testManager, nil
}
func getTestNodeInfo(allocatable v1.ResourceList) *schedulercache.NodeInfo {
@ -497,7 +570,6 @@ type TestResource struct {
}
func TestPodContainerDeviceAllocation(t *testing.T) {
flag.Set("alsologtostderr", fmt.Sprintf("%t", true))
res1 := TestResource{
resourceName: "domain1.com/resource1",
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
@ -520,7 +592,8 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
defer os.RemoveAll(tmpDir)
nodeInfo := getTestNodeInfo(v1.ResourceList{})
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
testManager := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
as.Nil(err)
testPods := []*v1.Pod{
makePod(v1.ResourceList{
@ -615,7 +688,8 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
as.Nil(err)
defer os.RemoveAll(tmpDir)
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
testManager := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
as.Nil(err)
podWithPluginResourcesInInitContainers := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
@ -693,14 +767,18 @@ func TestSanitizeNodeAllocatable(t *testing.T) {
as := assert.New(t)
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
tmpDir, err := ioutil.TempDir("", "checkpoint")
as.Nil(err)
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
as.Nil(err)
testManager := &ManagerImpl{
callback: monitorCallback,
healthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
callback: monitorCallback,
allocatedDevices: make(map[string]sets.String),
healthyDevices: make(map[string]sets.String),
podDevices: make(podDevices),
checkpointManager: ckm,
}
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
// require one of resource1 and one of resource2
testManager.allocatedDevices[resourceName1] = sets.NewString()
testManager.allocatedDevices[resourceName1].Insert(devID1)
@ -747,7 +825,8 @@ func TestDevicePreStartContainer(t *testing.T) {
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
pluginOpts[res1.resourceName] = &pluginapi.DevicePluginOptions{PreStartRequired: true}
testManager := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1}, pluginOpts)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1}, pluginOpts)
as.Nil(err)
ch := make(chan []string, 1)
testManager.endpoints[res1.resourceName] = &MockEndpoint{

View File

@ -21,6 +21,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
@ -126,18 +127,9 @@ func (pdev podDevices) devices() map[string]sets.String {
return ret
}
// podDevicesCheckpointEntry is used to record <pod, container> to device allocation information.
type podDevicesCheckpointEntry struct {
PodUID string
ContainerName string
ResourceName string
DeviceIDs []string
AllocResp []byte
}
// Turns podDevices to checkpointData.
func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
var data []podDevicesCheckpointEntry
func (pdev podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
var data []checkpoint.PodDevicesEntry
for podUID, containerDevices := range pdev {
for conName, resources := range containerDevices {
for resource, devices := range resources {
@ -152,7 +144,12 @@ func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
glog.Errorf("Can't marshal allocResp for %v %v %v: %v", podUID, conName, resource, err)
continue
}
data = append(data, podDevicesCheckpointEntry{podUID, conName, resource, devIds, allocResp})
data = append(data, checkpoint.PodDevicesEntry{
PodUID: podUID,
ContainerName: conName,
ResourceName: resource,
DeviceIDs: devIds,
AllocResp: allocResp})
}
}
}
@ -160,7 +157,7 @@ func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
}
// Populates podDevices from the passed in checkpointData.
func (pdev podDevices) fromCheckpointData(data []podDevicesCheckpointEntry) {
func (pdev podDevices) fromCheckpointData(data []checkpoint.PodDevicesEntry) {
for _, entry := range data {
glog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, entry.AllocResp)

View File

@ -17,12 +17,14 @@ limitations under the License.
package devicemanager
import (
"time"
"k8s.io/api/core/v1"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
"k8s.io/kubernetes/pkg/kubelet/config"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// Manager manages all the Device Plugins running on a node.
@ -86,6 +88,8 @@ const (
errInvalidResourceName = "the ResourceName %q is invalid"
// errEmptyResourceName is the error raised when the resource name field is empty
errEmptyResourceName = "invalid Empty ResourceName"
// errEndpointStopped indicates that the endpoint has been stopped
errEndpointStopped = "endpoint %v has been stopped"
// errBadSocket is the error raised when the registry socket path is not absolute
errBadSocket = "bad socketPath, must be an absolute path:"
@ -96,3 +100,12 @@ const (
// errListAndWatch is the error raised when ListAndWatch ended unsuccessfully
errListAndWatch = "listAndWatch ended unexpectedly for device plugin %s with error %v"
)
// endpointStopGracePeriod indicates the grace period after an endpoint is stopped
// because its device plugin fails. DeviceManager keeps the stopped endpoint in its
// cache during this grace period to cover the time gap for the capacity change to
// take effect.
const endpointStopGracePeriod = time.Duration(5) * time.Minute
// kubeletDeviceManagerCheckpoint is the file name of device plugin checkpoint
const kubeletDeviceManagerCheckpoint = "kubelet_internal_checkpoint"

View File

@ -103,7 +103,7 @@ func HugePageLimits(resourceList v1.ResourceList) map[int64]int64 {
}
// ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool) *ResourceConfig {
// sum requests and limits.
reqs, limits := resource.PodRequestsAndLimits(pod)
@ -146,6 +146,11 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
}
}
// quota is not capped when cfs quota is disabled
if !enforceCPULimits {
cpuQuota = int64(-1)
}
// determine the qos class
qosClass := v1qos.GetPodQOS(pod)

View File

@ -24,6 +24,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"strconv"
)
// getResourceList returns a ResourceList with the
@ -57,10 +58,12 @@ func TestResourceConfigForPod(t *testing.T) {
guaranteedShares := MilliCPUToShares(100)
guaranteedQuota, guaranteedPeriod := MilliCPUToQuota(100)
memoryQuantity = resource.MustParse("100Mi")
cpuNoLimit := int64(-1)
guaranteedMemory := memoryQuantity.Value()
testCases := map[string]struct {
pod *v1.Pod
expected *ResourceConfig
pod *v1.Pod
expected *ResourceConfig
enforceCPULimits bool
}{
"besteffort": {
pod: &v1.Pod{
@ -72,7 +75,8 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
},
expected: &ResourceConfig{CpuShares: &minShares},
enforceCPULimits: true,
expected: &ResourceConfig{CpuShares: &minShares},
},
"burstable-no-limits": {
pod: &v1.Pod{
@ -84,7 +88,8 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
},
expected: &ResourceConfig{CpuShares: &burstableShares},
enforceCPULimits: true,
expected: &ResourceConfig{CpuShares: &burstableShares},
},
"burstable-with-limits": {
pod: &v1.Pod{
@ -96,7 +101,21 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
},
expected: &ResourceConfig{CpuShares: &burstableShares, CpuQuota: &burstableQuota, CpuPeriod: &burstablePeriod, Memory: &burstableMemory},
enforceCPULimits: true,
expected: &ResourceConfig{CpuShares: &burstableShares, CpuQuota: &burstableQuota, CpuPeriod: &burstablePeriod, Memory: &burstableMemory},
},
"burstable-with-limits-no-cpu-enforcement": {
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: getResourceRequirements(getResourceList("100m", "100Mi"), getResourceList("200m", "200Mi")),
},
},
},
},
enforceCPULimits: false,
expected: &ResourceConfig{CpuShares: &burstableShares, CpuQuota: &cpuNoLimit, CpuPeriod: &burstablePeriod, Memory: &burstableMemory},
},
"burstable-partial-limits": {
pod: &v1.Pod{
@ -111,7 +130,8 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
},
expected: &ResourceConfig{CpuShares: &burstablePartialShares},
enforceCPULimits: true,
expected: &ResourceConfig{CpuShares: &burstablePartialShares},
},
"guaranteed": {
pod: &v1.Pod{
@ -123,11 +143,25 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
},
expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedPeriod, Memory: &guaranteedMemory},
enforceCPULimits: true,
expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedPeriod, Memory: &guaranteedMemory},
},
"guaranteed-no-cpu-enforcement": {
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: getResourceRequirements(getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")),
},
},
},
},
enforceCPULimits: false,
expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedPeriod, Memory: &guaranteedMemory},
},
}
for testName, testCase := range testCases {
actual := ResourceConfigForPod(testCase.pod)
actual := ResourceConfigForPod(testCase.pod, testCase.enforceCPULimits)
if !reflect.DeepEqual(actual.CpuPeriod, testCase.expected.CpuPeriod) {
t.Errorf("unexpected result, test: %v, cpu period not as expected", testName)
}
@ -197,3 +231,82 @@ func TestMilliCPUToQuota(t *testing.T) {
}
}
}
func TestHugePageLimits(t *testing.T) {
Mi := int64(1024 * 1024)
type inputStruct struct {
key string
input string
}
testCases := []struct {
name string
inputs []inputStruct
expected map[int64]int64
}{
{
name: "no valid hugepages",
inputs: []inputStruct{
{
key: "2Mi",
input: "128",
},
},
expected: map[int64]int64{},
},
{
name: "2Mi only",
inputs: []inputStruct{
{
key: v1.ResourceHugePagesPrefix + "2Mi",
input: "128",
},
},
expected: map[int64]int64{2 * Mi: 128},
},
{
name: "2Mi and 4Mi",
inputs: []inputStruct{
{
key: v1.ResourceHugePagesPrefix + "2Mi",
input: "128",
},
{
key: v1.ResourceHugePagesPrefix + strconv.FormatInt(2*Mi, 10),
input: "256",
},
{
key: v1.ResourceHugePagesPrefix + "4Mi",
input: "512",
},
{
key: "4Mi",
input: "1024",
},
},
expected: map[int64]int64{2 * Mi: 384, 4 * Mi: 512},
},
}
for _, testcase := range testCases {
t.Run(testcase.name, func(t *testing.T) {
resourceList := v1.ResourceList{}
for _, input := range testcase.inputs {
value, err := resource.ParseQuantity(input.input)
if err != nil {
t.Fatalf("error in parsing hugepages, value: %s", input.input)
} else {
resourceList[v1.ResourceName(input.key)] = value
}
}
resultValue := HugePageLimits(resourceList)
if !reflect.DeepEqual(testcase.expected, resultValue) {
t.Errorf("unexpected result, expected: %v, actual: %v", testcase.expected, resultValue)
}
})
}
}

View File

@ -43,7 +43,7 @@ func MilliCPUToShares(milliCPU int64) int64 {
}
// ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig {
func ResourceConfigForPod(pod *v1.Pod, enforceCPULimit bool) *ResourceConfig {
return nil
}

View File

@ -39,9 +39,10 @@ const (
defaultNodeAllocatableCgroupName = "kubepods"
)
//createNodeAllocatableCgroups creates Node Allocatable Cgroup when CgroupsPerQOS flag is specified as true
func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
Name: cm.cgroupRoot,
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
ResourceParameters: getCgroupConfig(cm.capacity),
}
@ -70,7 +71,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
Name: cm.cgroupRoot,
ResourceParameters: getCgroupConfig(nodeAllocatable),
}
@ -83,11 +84,12 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
}
// If Node Allocatable is enforced on a node that has not been drained or is updated on an existing node to a lower value,
// existing memory usage across pods might be higher that current Node Allocatable Memory Limits.
// existing memory usage across pods might be higher than current Node Allocatable Memory Limits.
// Pod Evictions are expected to bring down memory usage to below Node Allocatable limits.
// Until evictions happen retry cgroup updates.
// Update limits on non root cgroup-root to be safe since the default limits for CPU can be too low.
if cm.cgroupRoot != "/" {
// Check if cgroupRoot is set to a non-empty value (empty would be the root container)
if len(cm.cgroupRoot) > 0 {
go func() {
for {
err := cm.cgroupManager.Update(cgroupConfig)
@ -104,7 +106,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
// Now apply kube reserved and system reserved limits if required.
if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedEnforcementKey) {
glog.V(2).Infof("Enforcing System reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil {
if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.SystemReservedCgroupName), nc.SystemReserved); err != nil {
message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
@ -113,7 +115,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
}
if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedEnforcementKey) {
glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil {
if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.KubeReservedCgroupName), nc.KubeReserved); err != nil {
message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
@ -124,9 +126,9 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
}
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error {
func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cName),
Name: cName,
ResourceParameters: getCgroupConfig(rl),
}
glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)

View File

@ -49,6 +49,8 @@ type podContainerManagerImpl struct {
cgroupManager CgroupManager
// Maximum number of pids in a pod
podPidsLimit int64
// enforceCPULimits controls whether cfs quota is enforced or not
enforceCPULimits bool
}
// Make sure that podContainerManagerImpl implements the PodContainerManager interface
@ -79,7 +81,7 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
// Create the pod container
containerConfig := &CgroupConfig{
Name: podContainerName,
ResourceParameters: ResourceConfigForPod(pod),
ResourceParameters: ResourceConfigForPod(pod, m.enforceCPULimits),
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && m.podPidsLimit > 0 {
containerConfig.ResourceParameters.PodPidsLimit = &m.podPidsLimit
@ -102,7 +104,7 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, string) {
podQOS := v1qos.GetPodQOS(pod)
// Get the parent QOS container name
var parentContainer string
var parentContainer CgroupName
switch podQOS {
case v1.PodQOSGuaranteed:
parentContainer = m.qosContainersInfo.Guaranteed
@ -114,13 +116,33 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName,
podContainer := GetPodCgroupNameSuffix(pod.UID)
// Get the absolute path of the cgroup
cgroupName := (CgroupName)(path.Join(parentContainer, podContainer))
cgroupName := NewCgroupName(parentContainer, podContainer)
// Get the literal cgroupfs name
cgroupfsName := m.cgroupManager.Name(cgroupName)
return cgroupName, cgroupfsName
}
// Kill one process ID
func (m *podContainerManagerImpl) killOnePid(pid int) error {
// os.FindProcess never returns an error on POSIX
// https://go-review.googlesource.com/c/go/+/19093
p, _ := os.FindProcess(pid)
if err := p.Kill(); err != nil {
// If the process already exited, that's fine.
if strings.Contains(err.Error(), "process already finished") {
// Hate parsing strings, but
// vendor/github.com/opencontainers/runc/libcontainer/
// also does this.
glog.V(3).Infof("process with pid %v no longer exists", pid)
return nil
} else {
return err
}
}
return nil
}
// Scan through the whole cgroup directory and kill all processes either
// attached to the pod cgroup or to a container cgroup under the pod cgroup
func (m *podContainerManagerImpl) tryKillingCgroupProcesses(podCgroup CgroupName) error {
@ -139,13 +161,8 @@ func (m *podContainerManagerImpl) tryKillingCgroupProcesses(podCgroup CgroupName
}
errlist = []error{}
for _, pid := range pidsToKill {
p, err := os.FindProcess(pid)
if err != nil {
// Process not running anymore, do nothing
continue
}
glog.V(3).Infof("Attempt to kill process with pid: %v", pid)
if err := p.Kill(); err != nil {
if err := m.killOnePid(pid); err != nil {
glog.V(3).Infof("failed to kill process with pid: %v", pid)
errlist = append(errlist, err)
}
@ -182,12 +199,37 @@ func (m *podContainerManagerImpl) ReduceCPULimits(podCgroup CgroupName) error {
return m.cgroupManager.ReduceCPULimits(podCgroup)
}
// IsPodCgroup returns true if the literal cgroupfs name corresponds to a pod
func (m *podContainerManagerImpl) IsPodCgroup(cgroupfs string) (bool, types.UID) {
// convert the literal cgroupfs form to the driver specific value
cgroupName := m.cgroupManager.CgroupName(cgroupfs)
qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
basePath := ""
for _, qosContainerName := range qosContainersList {
// a pod cgroup is a direct child of a qos node, so check if its a match
if len(cgroupName) == len(qosContainerName)+1 {
basePath = cgroupName[len(qosContainerName)]
}
}
if basePath == "" {
return false, types.UID("")
}
if !strings.HasPrefix(basePath, podCgroupNamePrefix) {
return false, types.UID("")
}
parts := strings.Split(basePath, podCgroupNamePrefix)
if len(parts) != 2 {
return false, types.UID("")
}
return true, types.UID(parts[1])
}
// GetAllPodsFromCgroups scans through all the subsystems of pod cgroups
// Get list of pods whose cgroup still exist on the cgroup mounts
func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
// Map for storing all the found pods on the disk
foundPods := make(map[types.UID]CgroupName)
qosContainersList := [3]string{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
// Scan through all the subsystem mounts
// and through each QoS cgroup directory for each subsystem mount
// If a pod cgroup exists in even a single subsystem mount
@ -195,7 +237,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN
for _, val := range m.subsystems.MountPoints {
for _, qosContainerName := range qosContainersList {
// get the subsystems QoS cgroup absolute name
qcConversion := m.cgroupManager.Name(CgroupName(qosContainerName))
qcConversion := m.cgroupManager.Name(qosContainerName)
qc := path.Join(val, qcConversion)
dirInfo, err := ioutil.ReadDir(qc)
if err != nil {
@ -217,7 +259,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN
internalPath := m.cgroupManager.CgroupName(cgroupfsPath)
// we only care about base segment of the converted path since that
// is what we are reading currently to know if it is a pod or not.
basePath := path.Base(string(internalPath))
basePath := internalPath[len(internalPath)-1]
if !strings.Contains(basePath, podCgroupNamePrefix) {
continue
}
@ -257,7 +299,7 @@ func (m *podContainerManagerNoop) EnsureExists(_ *v1.Pod) error {
}
func (m *podContainerManagerNoop) GetPodContainerName(_ *v1.Pod) (CgroupName, string) {
return m.cgroupRoot, string(m.cgroupRoot)
return m.cgroupRoot, m.cgroupRoot.ToCgroupfs()
}
func (m *podContainerManagerNoop) GetPodContainerNameForDriver(_ *v1.Pod) string {
@ -276,3 +318,7 @@ func (m *podContainerManagerNoop) ReduceCPULimits(_ CgroupName) error {
func (m *podContainerManagerNoop) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
return nil, nil
}
func (m *podContainerManagerNoop) IsPodCgroup(cgroupfs string) (bool, types.UID) {
return false, types.UID("")
}

View File

@ -0,0 +1,125 @@
// +build linux
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"strings"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
)
func TestIsCgroupPod(t *testing.T) {
qosContainersInfo := QOSContainersInfo{
Guaranteed: RootCgroupName,
Burstable: NewCgroupName(RootCgroupName, strings.ToLower(string(v1.PodQOSBurstable))),
BestEffort: NewCgroupName(RootCgroupName, strings.ToLower(string(v1.PodQOSBestEffort))),
}
podUID := types.UID("123")
testCases := []struct {
input CgroupName
expectedResult bool
expectedUID types.UID
}{
{
input: RootCgroupName,
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(qosContainersInfo.Guaranteed),
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(qosContainersInfo.Guaranteed, GetPodCgroupNameSuffix(podUID)),
expectedResult: true,
expectedUID: podUID,
},
{
input: NewCgroupName(qosContainersInfo.Guaranteed, GetPodCgroupNameSuffix(podUID), "container.scope"),
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(qosContainersInfo.Burstable),
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(qosContainersInfo.Burstable, GetPodCgroupNameSuffix(podUID)),
expectedResult: true,
expectedUID: podUID,
},
{
input: NewCgroupName(qosContainersInfo.Burstable, GetPodCgroupNameSuffix(podUID), "container.scope"),
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(qosContainersInfo.BestEffort),
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(qosContainersInfo.BestEffort, GetPodCgroupNameSuffix(podUID)),
expectedResult: true,
expectedUID: podUID,
},
{
input: NewCgroupName(qosContainersInfo.BestEffort, GetPodCgroupNameSuffix(podUID), "container.scope"),
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(RootCgroupName, "system"),
expectedResult: false,
expectedUID: types.UID(""),
},
{
input: NewCgroupName(RootCgroupName, "system", "kubelet"),
expectedResult: false,
expectedUID: types.UID(""),
},
}
for _, cgroupDriver := range []string{"cgroupfs", "systemd"} {
pcm := &podContainerManagerImpl{
cgroupManager: NewCgroupManager(nil, cgroupDriver),
enforceCPULimits: true,
qosContainersInfo: qosContainersInfo,
}
for _, testCase := range testCases {
// give the right cgroup structure based on driver
cgroupfs := testCase.input.ToCgroupfs()
if cgroupDriver == "systemd" {
cgroupfs = testCase.input.ToSystemd()
}
// check if this is a pod or not with the literal cgroupfs input
result, resultUID := pcm.IsPodCgroup(cgroupfs)
if result != testCase.expectedResult {
t.Errorf("Unexpected result for driver: %v, input: %v, expected: %v, actual: %v", cgroupDriver, testCase.input, testCase.expectedResult, result)
}
if resultUID != testCase.expectedUID {
t.Errorf("Unexpected result for driver: %v, input: %v, expected: %v, actual: %v", cgroupDriver, testCase.input, testCase.expectedUID, resultUID)
}
}
}
}

View File

@ -35,7 +35,7 @@ func (m *podContainerManagerStub) EnsureExists(_ *v1.Pod) error {
}
func (m *podContainerManagerStub) GetPodContainerName(_ *v1.Pod) (CgroupName, string) {
return "", ""
return nil, ""
}
func (m *podContainerManagerStub) Destroy(_ CgroupName) error {
@ -49,3 +49,7 @@ func (m *podContainerManagerStub) ReduceCPULimits(_ CgroupName) error {
func (m *podContainerManagerStub) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
return nil, nil
}
func (m *podContainerManagerStub) IsPodCgroup(cgroupfs string) (bool, types.UID) {
return false, types.UID("")
}

View File

@ -18,7 +18,6 @@ package cm
import (
"fmt"
"path"
"strings"
"sync"
"time"
@ -60,18 +59,18 @@ type qosContainerManagerImpl struct {
qosReserved map[v1.ResourceName]int64
}
func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot string, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) {
func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot CgroupName, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) {
if !nodeConfig.CgroupsPerQOS {
return &qosContainerManagerNoop{
cgroupRoot: CgroupName(cgroupRoot),
cgroupRoot: cgroupRoot,
}, nil
}
return &qosContainerManagerImpl{
subsystems: subsystems,
cgroupManager: cgroupManager,
cgroupRoot: CgroupName(cgroupRoot),
qosReserved: nodeConfig.ExperimentalQOSReserved,
cgroupRoot: cgroupRoot,
qosReserved: nodeConfig.QOSReserved,
}, nil
}
@ -81,23 +80,20 @@ func (m *qosContainerManagerImpl) GetQOSContainersInfo() QOSContainersInfo {
func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceList, activePods ActivePodsFunc) error {
cm := m.cgroupManager
rootContainer := string(m.cgroupRoot)
if !cm.Exists(CgroupName(rootContainer)) {
return fmt.Errorf("root container %s doesn't exist", rootContainer)
rootContainer := m.cgroupRoot
if !cm.Exists(rootContainer) {
return fmt.Errorf("root container %v doesn't exist", rootContainer)
}
// Top level for Qos containers are created only for Burstable
// and Best Effort classes
qosClasses := map[v1.PodQOSClass]string{
v1.PodQOSBurstable: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))),
v1.PodQOSBestEffort: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))),
qosClasses := map[v1.PodQOSClass]CgroupName{
v1.PodQOSBurstable: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))),
v1.PodQOSBestEffort: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))),
}
// Create containers for both qos classes
for qosClass, containerName := range qosClasses {
// get the container's absolute name
absoluteContainerName := CgroupName(containerName)
resourceParameters := &ResourceConfig{}
// the BestEffort QoS class has a statically configured minShares value
if qosClass == v1.PodQOSBestEffort {
@ -107,7 +103,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
// containerConfig object stores the cgroup specifications
containerConfig := &CgroupConfig{
Name: absoluteContainerName,
Name: containerName,
ResourceParameters: resourceParameters,
}
@ -117,7 +113,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
}
// check if it exists
if !cm.Exists(absoluteContainerName) {
if !cm.Exists(containerName) {
if err := cm.Create(containerConfig); err != nil {
return fmt.Errorf("failed to create top level %v QOS cgroup : %v", qosClass, err)
}
@ -279,11 +275,11 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
qosConfigs := map[v1.PodQOSClass]*CgroupConfig{
v1.PodQOSBurstable: {
Name: CgroupName(m.qosContainersInfo.Burstable),
Name: m.qosContainersInfo.Burstable,
ResourceParameters: &ResourceConfig{},
},
v1.PodQOSBestEffort: {
Name: CgroupName(m.qosContainersInfo.BestEffort),
Name: m.qosContainersInfo.BestEffort,
ResourceParameters: &ResourceConfig{},
},
}
@ -300,31 +296,34 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
}
}
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.setMemoryReserve(qosConfigs, percentReserve)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.QOSReserved) {
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.setMemoryReserve(qosConfigs, percentReserve)
}
}
}
updateSuccess := true
for _, config := range qosConfigs {
err := m.cgroupManager.Update(config)
if err != nil {
updateSuccess = false
}
}
if updateSuccess {
glog.V(4).Infof("[ContainerManager]: Updated QoS cgroup configuration")
return nil
}
// If the resource can adjust the ResourceConfig to increase likelihood of
// success, call the adjustment function here. Otherwise, the Update() will
// be called again with the same values.
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.retrySetMemoryReserve(qosConfigs, percentReserve)
updateSuccess := true
for _, config := range qosConfigs {
err := m.cgroupManager.Update(config)
if err != nil {
updateSuccess = false
}
}
if updateSuccess {
glog.V(4).Infof("[ContainerManager]: Updated QoS cgroup configuration")
return nil
}
// If the resource can adjust the ResourceConfig to increase likelihood of
// success, call the adjustment function here. Otherwise, the Update() will
// be called again with the same values.
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.retrySetMemoryReserve(qosConfigs, percentReserve)
}
}
}
@ -336,7 +335,7 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
}
}
glog.V(4).Infof("[ContainerManager]: Updated QoS cgroup configuration on retry")
glog.V(4).Infof("[ContainerManager]: Updated QoS cgroup configuration")
return nil
}

View File

@ -38,7 +38,9 @@ type ResourceConfig struct {
}
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion.
type CgroupName string
// It is specified as a list of strings from its individual components, such as:
// {"kubepods", "burstable", "pod1234-abcd-5678-efgh"}
type CgroupName []string
// CgroupConfig holds the cgroup configuration information.
// This is common object which is used to specify
@ -78,7 +80,7 @@ type CgroupManager interface {
Exists(name CgroupName) bool
// Name returns the literal cgroupfs name on the host after any driver specific conversions.
// We would expect systemd implementation to make appropriate name conversion.
// For example, if we pass /foo/bar
// For example, if we pass {"foo", "bar"}
// then systemd should convert the name to something like
// foo.slice/foo-bar.slice
Name(name CgroupName) string
@ -94,9 +96,9 @@ type CgroupManager interface {
// QOSContainersInfo stores the names of containers per qos
type QOSContainersInfo struct {
Guaranteed string
BestEffort string
Burstable string
Guaranteed CgroupName
BestEffort CgroupName
Burstable CgroupName
}
// PodContainerManager stores and manages pod level containers
@ -122,4 +124,7 @@ type PodContainerManager interface {
// GetAllPodsFromCgroups enumerates the set of pod uids to their associated cgroup based on state of cgroupfs system.
GetAllPodsFromCgroups() (map[types.UID]CgroupName, error)
// IsPodCgroup returns true if the literal cgroupfs name corresponds to a pod
IsPodCgroup(cgroupfs string) (bool, types.UID)
}

View File

@ -58,6 +58,7 @@ go_library(
"//pkg/apis/core/v1:go_default_library",
"//pkg/apis/core/validation:go_default_library",
"//pkg/kubelet/checkpoint:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/types:go_default_library",
@ -80,6 +81,7 @@ go_library(
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/golang.org/x/exp/inotify:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
],
"//conditions:default": [],
}),
@ -91,6 +93,7 @@ go_test(
"apiserver_test.go",
"common_test.go",
"config_test.go",
"file_test.go",
"http_test.go",
] + select({
"@io_bazel_rules_go//go/platform:linux": [
@ -105,6 +108,8 @@ go_test(
"//pkg/apis/core:go_default_library",
"//pkg/apis/core/v1:go_default_library",
"//pkg/apis/core/validation:go_default_library",
"//pkg/kubelet/checkpoint:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/securitycontext:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",

View File

@ -100,7 +100,7 @@ func getSelfLink(name, namespace string) string {
if len(namespace) == 0 {
namespace = metav1.NamespaceDefault
}
selfLink = fmt.Sprintf("/api/"+legacyscheme.Registry.GroupOrDie(api.GroupName).GroupVersion.Version+"/namespaces/%s/pods/%s", namespace, name)
selfLink = fmt.Sprintf("/api/v1/namespaces/%s/pods/%s", namespace, name)
return selfLink
}

View File

@ -78,7 +78,7 @@ func TestDecodeSinglePod(t *testing.T) {
t.Errorf("expected:\n%#v\ngot:\n%#v\n%s", pod, podOut, string(json))
}
for _, gv := range legacyscheme.Registry.EnabledVersionsForGroup(v1.GroupName) {
for _, gv := range legacyscheme.Scheme.PrioritizedVersionsForGroup(v1.GroupName) {
info, _ := runtime.SerializerInfoForMediaType(legacyscheme.Codecs.SupportedMediaTypes(), "application/yaml")
encoder := legacyscheme.Codecs.EncoderForVersion(info.Serializer, gv)
yaml, err := runtime.Encode(encoder, pod)
@ -144,7 +144,7 @@ func TestDecodePodList(t *testing.T) {
t.Errorf("expected:\n%#v\ngot:\n%#v\n%s", podList, &podListOut, string(json))
}
for _, gv := range legacyscheme.Registry.EnabledVersionsForGroup(v1.GroupName) {
for _, gv := range legacyscheme.Scheme.PrioritizedVersionsForGroup(v1.GroupName) {
info, _ := runtime.SerializerInfoForMediaType(legacyscheme.Codecs.SupportedMediaTypes(), "application/yaml")
encoder := legacyscheme.Codecs.EncoderForVersion(info.Serializer, gv)
yaml, err := runtime.Encode(encoder, podList)

View File

@ -27,6 +27,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/kubelet/checkpoint"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/events"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -64,7 +65,7 @@ type PodConfig struct {
// contains the list of all configured sources
sourcesLock sync.Mutex
sources sets.String
checkpointManager checkpoint.Manager
checkpointManager checkpointmanager.CheckpointManager
}
// NewPodConfig creates an object that can merge many configuration sources into a stream
@ -112,15 +113,20 @@ func (c *PodConfig) Sync() {
// Restore restores pods from the checkpoint path, *once*
func (c *PodConfig) Restore(path string, updates chan<- interface{}) error {
var err error
if c.checkpointManager == nil {
c.checkpointManager = checkpoint.NewCheckpointManager(path)
pods, err := c.checkpointManager.LoadPods()
if err == nil {
updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.RESTORE, Source: kubetypes.ApiserverSource}
}
if c.checkpointManager != nil {
return nil
}
return err
var err error
c.checkpointManager, err = checkpointmanager.NewCheckpointManager(path)
if err != nil {
return err
}
pods, err := checkpoint.LoadPods(c.checkpointManager)
if err != nil {
return err
}
updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.RESTORE, Source: kubetypes.ApiserverSource}
return nil
}
// podStorage manages the current pod state at any point in time and ensures updates
@ -308,6 +314,9 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
}
case kubetypes.RESTORE:
glog.V(4).Infof("Restoring pods for source %s", source)
for _, value := range update.Pods {
restorePods = append(restorePods, value)
}
default:
glog.Warningf("Received invalid update type: %v", update)

View File

@ -17,7 +17,9 @@ limitations under the License.
package config
import (
"io/ioutil"
"math/rand"
"os"
"reflect"
"sort"
"strconv"
@ -30,6 +32,9 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/kubelet/checkpoint"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/securitycontext"
)
@ -85,6 +90,14 @@ func CreatePodUpdate(op kubetypes.PodOperation, source string, pods ...*v1.Pod)
return kubetypes.PodUpdate{Pods: pods, Op: op, Source: source}
}
func createPodConfigTesterByChannel(mode PodConfigNotificationMode, channelName string) (chan<- interface{}, <-chan kubetypes.PodUpdate, *PodConfig) {
eventBroadcaster := record.NewBroadcaster()
config := NewPodConfig(mode, eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "kubelet"}))
channel := config.Channel(channelName)
ch := config.Updates()
return channel, ch, config
}
func createPodConfigTester(mode PodConfigNotificationMode) (chan<- interface{}, <-chan kubetypes.PodUpdate, *PodConfig) {
eventBroadcaster := record.NewBroadcaster()
config := NewPodConfig(mode, eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "kubelet"}))
@ -413,3 +426,35 @@ func TestPodUpdateLabels(t *testing.T) {
expectPodUpdate(t, ch, CreatePodUpdate(kubetypes.UPDATE, TestSource, pod))
}
func TestPodRestore(t *testing.T) {
tmpDir, _ := ioutil.TempDir("", "")
defer os.RemoveAll(tmpDir)
pod := CreateValidPod("api-server", "kube-default")
pod.Annotations = make(map[string]string, 0)
pod.Annotations["kubernetes.io/config.source"] = kubetypes.ApiserverSource
pod.Annotations[core.BootstrapCheckpointAnnotationKey] = "true"
// Create Checkpointer
checkpointManager, err := checkpointmanager.NewCheckpointManager(tmpDir)
if err != nil {
t.Fatalf("failed to initialize checkpoint manager: %v", err)
}
if err := checkpoint.WritePod(checkpointManager, pod); err != nil {
t.Fatalf("Error writing checkpoint for pod: %v", pod.GetName())
}
// Restore checkpoint
channel, ch, config := createPodConfigTesterByChannel(PodConfigNotificationIncremental, kubetypes.ApiserverSource)
if err := config.Restore(tmpDir, channel); err != nil {
t.Fatalf("Restore returned error: %v", err)
}
expectPodUpdate(t, ch, CreatePodUpdate(kubetypes.RESTORE, kubetypes.ApiserverSource, pod))
// Verify Restore only happen once
if err := config.Restore(tmpDir, channel); err != nil {
t.Fatalf("The second restore returned error: %v", err)
}
expectNoPodUpdate(t, ch)
}

View File

@ -30,30 +30,46 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/cache"
api "k8s.io/kubernetes/pkg/apis/core"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
type podEventType int
const (
podAdd podEventType = iota
podModify
podDelete
eventBufferLen = 10
)
type watchEvent struct {
fileName string
eventType podEventType
}
type sourceFile struct {
path string
nodeName types.NodeName
period time.Duration
store cache.Store
fileKeyMapping map[string]string
updates chan<- interface{}
watchEvents chan *watchEvent
}
func NewSourceFile(path string, nodeName types.NodeName, period time.Duration, updates chan<- interface{}) {
// "golang.org/x/exp/inotify" requires a path without trailing "/"
path = strings.TrimRight(path, string(os.PathSeparator))
config := newSourceFile(path, nodeName, updates)
config := newSourceFile(path, nodeName, period, updates)
glog.V(1).Infof("Watching path %q", path)
go wait.Forever(config.run, period)
config.run()
}
func newSourceFile(path string, nodeName types.NodeName, updates chan<- interface{}) *sourceFile {
func newSourceFile(path string, nodeName types.NodeName, period time.Duration, updates chan<- interface{}) *sourceFile {
send := func(objs []interface{}) {
var pods []*v1.Pod
for _, o := range objs {
@ -65,23 +81,40 @@ func newSourceFile(path string, nodeName types.NodeName, updates chan<- interfac
return &sourceFile{
path: path,
nodeName: nodeName,
period: period,
store: store,
fileKeyMapping: map[string]string{},
updates: updates,
watchEvents: make(chan *watchEvent, eventBufferLen),
}
}
func (s *sourceFile) run() {
if err := s.watch(); err != nil {
glog.Errorf("Unable to read manifest path %q: %v", s.path, err)
}
listTicker := time.NewTicker(s.period)
go func() {
for {
select {
case <-listTicker.C:
if err := s.listConfig(); err != nil {
glog.Errorf("Unable to read config path %q: %v", s.path, err)
}
case e := <-s.watchEvents:
if err := s.consumeWatchEvent(e); err != nil {
glog.Errorf("Unable to process watch event: %v", err)
}
}
}
}()
s.startWatch()
}
func (s *sourceFile) applyDefaults(pod *api.Pod, source string) error {
return applyDefaults(pod, source, true, s.nodeName)
}
func (s *sourceFile) resetStoreFromPath() error {
func (s *sourceFile) listConfig() error {
path := s.path
statInfo, err := os.Stat(path)
if err != nil {
@ -158,7 +191,7 @@ func (s *sourceFile) extractFromDir(name string) ([]*v1.Pod, error) {
}
func (s *sourceFile) extractFromFile(filename string) (pod *v1.Pod, err error) {
glog.V(3).Infof("Reading manifest file %q", filename)
glog.V(3).Infof("Reading config file %q", filename)
defer func() {
if err == nil && pod != nil {
objKey, keyErr := cache.MetaNamespaceKeyFunc(pod)
@ -193,7 +226,7 @@ func (s *sourceFile) extractFromFile(filename string) (pod *v1.Pod, err error) {
return pod, nil
}
return pod, fmt.Errorf("%v: couldn't parse as pod(%v), please check manifest file.\n", filename, podErr)
return pod, fmt.Errorf("%v: couldn't parse as pod(%v), please check config file.\n", filename, podErr)
}
func (s *sourceFile) replaceStore(pods ...*v1.Pod) (err error) {

View File

@ -24,23 +24,49 @@ import (
"os"
"path/filepath"
"strings"
"time"
"github.com/golang/glog"
"golang.org/x/exp/inotify"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/flowcontrol"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
type podEventType int
const (
podAdd podEventType = iota
podModify
podDelete
retryPeriod = 1 * time.Second
maxRetryPeriod = 20 * time.Second
)
func (s *sourceFile) watch() error {
type retryableError struct {
message string
}
func (e *retryableError) Error() string {
return e.message
}
func (s *sourceFile) startWatch() {
backOff := flowcontrol.NewBackOff(retryPeriod, maxRetryPeriod)
backOffId := "watch"
go wait.Forever(func() {
if backOff.IsInBackOffSinceUpdate(backOffId, time.Now()) {
return
}
if err := s.doWatch(); err != nil {
glog.Errorf("Unable to read config path %q: %v", s.path, err)
if _, retryable := err.(*retryableError); !retryable {
backOff.Next(backOffId, time.Now())
}
}
}, retryPeriod)
}
func (s *sourceFile) doWatch() error {
_, err := os.Stat(s.path)
if err != nil {
if !os.IsNotExist(err) {
@ -48,7 +74,7 @@ func (s *sourceFile) watch() error {
}
// Emit an update with an empty PodList to allow FileSource to be marked as seen
s.updates <- kubetypes.PodUpdate{Pods: []*v1.Pod{}, Op: kubetypes.SET, Source: kubetypes.FileSource}
return fmt.Errorf("path does not exist, ignoring")
return &retryableError{"path does not exist, ignoring"}
}
w, err := inotify.NewWatcher()
@ -57,22 +83,16 @@ func (s *sourceFile) watch() error {
}
defer w.Close()
err = w.AddWatch(s.path, inotify.IN_DELETE_SELF|inotify.IN_CREATE|inotify.IN_MOVED_TO|inotify.IN_MODIFY|inotify.IN_MOVED_FROM|inotify.IN_DELETE)
err = w.AddWatch(s.path, inotify.IN_DELETE_SELF|inotify.IN_CREATE|inotify.IN_MOVED_TO|inotify.IN_MODIFY|inotify.IN_MOVED_FROM|inotify.IN_DELETE|inotify.IN_ATTRIB)
if err != nil {
return fmt.Errorf("unable to create inotify for path %q: %v", s.path, err)
}
// Reset store with manifest files already existing when starting
if err := s.resetStoreFromPath(); err != nil {
return fmt.Errorf("unable to read manifest path %q: %v", s.path, err)
}
for {
select {
case event := <-w.Event:
err = s.processEvent(event)
if err != nil {
return fmt.Errorf("error while processing event (%+v): %v", event, err)
if err = s.produceWatchEvent(event); err != nil {
return fmt.Errorf("error while processing inotify event (%+v): %v", event, err)
}
case err = <-w.Error:
return fmt.Errorf("error while watching %q: %v", s.path, err)
@ -80,7 +100,7 @@ func (s *sourceFile) watch() error {
}
}
func (s *sourceFile) processEvent(e *inotify.Event) error {
func (s *sourceFile) produceWatchEvent(e *inotify.Event) error {
// Ignore file start with dots
if strings.HasPrefix(filepath.Base(e.Name), ".") {
glog.V(4).Infof("Ignored pod manifest: %s, because it starts with dots", e.Name)
@ -97,6 +117,8 @@ func (s *sourceFile) processEvent(e *inotify.Event) error {
eventType = podAdd
case (e.Mask & inotify.IN_MODIFY) > 0:
eventType = podModify
case (e.Mask & inotify.IN_ATTRIB) > 0:
eventType = podModify
case (e.Mask & inotify.IN_DELETE) > 0:
eventType = podDelete
case (e.Mask & inotify.IN_MOVED_FROM) > 0:
@ -108,22 +130,31 @@ func (s *sourceFile) processEvent(e *inotify.Event) error {
return nil
}
switch eventType {
s.watchEvents <- &watchEvent{e.Name, eventType}
return nil
}
func (s *sourceFile) consumeWatchEvent(e *watchEvent) error {
switch e.eventType {
case podAdd, podModify:
if pod, err := s.extractFromFile(e.Name); err != nil {
glog.Errorf("Can't process manifest file %q: %v", e.Name, err)
if pod, err := s.extractFromFile(e.fileName); err != nil {
return fmt.Errorf("can't process config file %q: %v", e.fileName, err)
} else {
return s.store.Add(pod)
}
case podDelete:
if objKey, keyExist := s.fileKeyMapping[e.Name]; keyExist {
if objKey, keyExist := s.fileKeyMapping[e.fileName]; keyExist {
pod, podExist, err := s.store.GetByKey(objKey)
if err != nil {
return err
} else if !podExist {
return fmt.Errorf("the pod with key %s doesn't exist in cache", objKey)
} else {
return s.store.Delete(pod)
if err = s.store.Delete(pod); err != nil {
return fmt.Errorf("failed to remove deleted pod from cache: %v", err)
} else {
delete(s.fileKeyMapping, e.fileName)
}
}
}
}

View File

@ -21,7 +21,6 @@ package config
import (
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
@ -35,7 +34,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
utiltesting "k8s.io/client-go/util/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/api/testapi"
api "k8s.io/kubernetes/pkg/apis/core"
k8s_api_v1 "k8s.io/kubernetes/pkg/apis/core/v1"
@ -46,8 +45,8 @@ import (
func TestExtractFromNonExistentFile(t *testing.T) {
ch := make(chan interface{}, 1)
c := newSourceFile("/some/fake/file", "localhost", ch)
err := c.watch()
lw := newSourceFile("/some/fake/file", "localhost", time.Millisecond, ch)
err := lw.doWatch()
if err == nil {
t.Errorf("Expected error")
}
@ -75,7 +74,7 @@ func TestReadPodsFromFileExistAlready(t *testing.T) {
for _, testCase := range testCases {
func() {
dirName, err := utiltesting.MkTmpdir("file-test")
dirName, err := mkTempDir("file-test")
if err != nil {
t.Fatalf("unable to create temp dir: %v", err)
}
@ -107,69 +106,35 @@ func TestReadPodsFromFileExistAlready(t *testing.T) {
}
}
func TestReadPodsFromFileExistLater(t *testing.T) {
watchFileAdded(false, t)
var (
testCases = []struct {
watchDir bool
symlink bool
}{
{true, true},
{true, false},
{false, true},
{false, false},
}
)
func TestWatchFileAdded(t *testing.T) {
for _, testCase := range testCases {
watchFileAdded(testCase.watchDir, testCase.symlink, t)
}
}
func TestReadPodsFromFileChanged(t *testing.T) {
watchFileChanged(false, t)
}
func TestReadPodsFromFileInDirAdded(t *testing.T) {
watchFileAdded(true, t)
}
func TestReadPodsFromFileInDirChanged(t *testing.T) {
watchFileChanged(true, t)
}
func TestExtractFromBadDataFile(t *testing.T) {
dirName, err := utiltesting.MkTmpdir("file-test")
if err != nil {
t.Fatalf("unable to create temp dir: %v", err)
}
defer os.RemoveAll(dirName)
fileName := filepath.Join(dirName, "test_pod_manifest")
err = ioutil.WriteFile(fileName, []byte{1, 2, 3}, 0555)
if err != nil {
t.Fatalf("unable to write test file %#v", err)
}
ch := make(chan interface{}, 1)
c := newSourceFile(fileName, "localhost", ch)
err = c.resetStoreFromPath()
if err == nil {
t.Fatalf("expected error, got nil")
}
expectEmptyChannel(t, ch)
}
func TestExtractFromEmptyDir(t *testing.T) {
dirName, err := utiltesting.MkTmpdir("file-test")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
defer os.RemoveAll(dirName)
ch := make(chan interface{}, 1)
c := newSourceFile(dirName, "localhost", ch)
err = c.resetStoreFromPath()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
update := (<-ch).(kubetypes.PodUpdate)
expected := CreatePodUpdate(kubetypes.SET, kubetypes.FileSource)
if !apiequality.Semantic.DeepEqual(expected, update) {
t.Fatalf("expected %#v, Got %#v", expected, update)
func TestWatchFileChanged(t *testing.T) {
for _, testCase := range testCases {
watchFileChanged(testCase.watchDir, testCase.symlink, t)
}
}
type testCase struct {
desc string
pod runtime.Object
expected kubetypes.PodUpdate
desc string
linkedFile string
pod runtime.Object
expected kubetypes.PodUpdate
}
func getTestCases(hostname types.NodeName) []*testCase {
@ -234,7 +199,7 @@ func getTestCases(hostname types.NodeName) []*testCase {
func (tc *testCase) writeToFile(dir, name string, t *testing.T) string {
var versionedPod runtime.Object
err := testapi.Default.Converter().Convert(&tc.pod, &versionedPod, nil)
err := legacyscheme.Scheme.Convert(&tc.pod, &versionedPod, nil)
if err != nil {
t.Fatalf("%s: error in versioning the pod: %v", tc.desc, err)
}
@ -250,19 +215,40 @@ func (tc *testCase) writeToFile(dir, name string, t *testing.T) string {
return fileName
}
func watchFileAdded(watchDir bool, t *testing.T) {
func createSymbolicLink(link, target, name string, t *testing.T) string {
linkName := filepath.Join(link, name)
linkedFile := filepath.Join(target, name)
err := os.Symlink(linkedFile, linkName)
if err != nil {
t.Fatalf("unexpected error when create symbolic link: %v", err)
}
return linkName
}
func watchFileAdded(watchDir bool, symlink bool, t *testing.T) {
hostname := types.NodeName("random-test-hostname")
var testCases = getTestCases(hostname)
fileNamePre := "test_pod_manifest"
for index, testCase := range testCases {
func() {
dirName, err := utiltesting.MkTmpdir("dir-test")
dirName, err := mkTempDir("dir-test")
if err != nil {
t.Fatalf("unable to create temp dir: %v", err)
}
defer os.RemoveAll(dirName)
defer removeAll(dirName, t)
fileName := fmt.Sprintf("%s_%d", fileNamePre, index)
var linkedDirName string
if symlink {
linkedDirName, err = mkTempDir("linked-dir-test")
if err != nil {
t.Fatalf("unable to create temp dir for linked files: %v", err)
}
defer removeAll(linkedDirName, t)
createSymbolicLink(dirName, linkedDirName, fileName, t)
}
ch := make(chan interface{})
if watchDir {
@ -274,12 +260,17 @@ func watchFileAdded(watchDir bool, t *testing.T) {
addFile := func() {
// Add a file
if symlink {
testCase.writeToFile(linkedDirName, fileName, t)
return
}
testCase.writeToFile(dirName, fileName, t)
}
go addFile()
// For !watchDir: expect an update by SourceFile.resetStoreFromPath().
// For !watchDir: expect an update by SourceFile.reloadConfig().
// For watchDir: expect at least one update from CREATE & MODIFY inotify event.
// Shouldn't expect two updates from CREATE & MODIFY because CREATE doesn't guarantee file written.
// In that case no update will be sent from CREATE event.
@ -288,19 +279,29 @@ func watchFileAdded(watchDir bool, t *testing.T) {
}
}
func watchFileChanged(watchDir bool, t *testing.T) {
func watchFileChanged(watchDir bool, symlink bool, t *testing.T) {
hostname := types.NodeName("random-test-hostname")
var testCases = getTestCases(hostname)
fileNamePre := "test_pod_manifest"
for index, testCase := range testCases {
func() {
dirName, err := utiltesting.MkTmpdir("dir-test")
dirName, err := mkTempDir("dir-test")
fileName := fmt.Sprintf("%s_%d", fileNamePre, index)
if err != nil {
t.Fatalf("unable to create temp dir: %v", err)
}
defer os.RemoveAll(dirName)
defer removeAll(dirName, t)
var linkedDirName string
if symlink {
linkedDirName, err = mkTempDir("linked-dir-test")
if err != nil {
t.Fatalf("unable to create temp dir for linked files: %v", err)
}
defer removeAll(linkedDirName, t)
createSymbolicLink(dirName, linkedDirName, fileName, t)
}
var file string
lock := &sync.Mutex{}
@ -308,6 +309,12 @@ func watchFileChanged(watchDir bool, t *testing.T) {
func() {
lock.Lock()
defer lock.Unlock()
if symlink {
file = testCase.writeToFile(linkedDirName, fileName, t)
return
}
file = testCase.writeToFile(dirName, fileName, t)
}()
@ -332,7 +339,12 @@ func watchFileChanged(watchDir bool, t *testing.T) {
pod.Spec.Containers[0].Name = "image2"
testCase.expected.Pods[0].Spec.Containers[0].Name = "image2"
testCase.writeToFile(dirName, fileName, t)
if symlink {
file = testCase.writeToFile(linkedDirName, fileName, t)
return
}
file = testCase.writeToFile(dirName, fileName, t)
}
go changeFile()
@ -370,6 +382,10 @@ func expectUpdate(t *testing.T, ch chan interface{}, testCase *testCase) {
select {
case got := <-ch:
update := got.(kubetypes.PodUpdate)
if len(update.Pods) == 0 {
// filter out the empty updates from reading a non-existing path
continue
}
for _, pod := range update.Pods {
// TODO: remove the conversion when validation is performed on versioned objects.
internalPod := &api.Pod{}

View File

@ -0,0 +1,84 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"time"
apiequality "k8s.io/apimachinery/pkg/api/equality"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
func TestExtractFromBadDataFile(t *testing.T) {
dirName, err := mkTempDir("file-test")
if err != nil {
t.Fatalf("unable to create temp dir: %v", err)
}
defer removeAll(dirName, t)
fileName := filepath.Join(dirName, "test_pod_config")
err = ioutil.WriteFile(fileName, []byte{1, 2, 3}, 0555)
if err != nil {
t.Fatalf("unable to write test file %#v", err)
}
ch := make(chan interface{}, 1)
lw := newSourceFile(fileName, "localhost", time.Millisecond, ch)
err = lw.listConfig()
if err == nil {
t.Fatalf("expected error, got nil")
}
expectEmptyChannel(t, ch)
}
func TestExtractFromEmptyDir(t *testing.T) {
dirName, err := mkTempDir("file-test")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
defer removeAll(dirName, t)
ch := make(chan interface{}, 1)
lw := newSourceFile(dirName, "localhost", time.Millisecond, ch)
err = lw.listConfig()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
update, ok := (<-ch).(kubetypes.PodUpdate)
if !ok {
t.Fatalf("unexpected type: %#v", update)
}
expected := CreatePodUpdate(kubetypes.SET, kubetypes.FileSource)
if !apiequality.Semantic.DeepEqual(expected, update) {
t.Fatalf("expected %#v, got %#v", expected, update)
}
}
func mkTempDir(prefix string) (string, error) {
return ioutil.TempDir(os.TempDir(), prefix)
}
func removeAll(dir string, t *testing.T) {
if err := os.RemoveAll(dir); err != nil {
t.Fatalf("unable to remove dir %s: %v", dir, err)
}
}

View File

@ -19,8 +19,16 @@ limitations under the License.
// Reads the pod configuration from file or a directory of files.
package config
import "errors"
import (
"fmt"
func (s *sourceFile) watch() error {
return errors.New("source file is unsupported in this build")
"github.com/golang/glog"
)
func (s *sourceFile) startWatch() {
glog.Errorf("Watching source file is unsupported in this build")
}
func (s *sourceFile) consumeWatchEvent(e *watchEvent) error {
return fmt.Errorf("consuming watch event is unsupported in this build")
}

View File

@ -17,6 +17,8 @@ limitations under the License.
package config
import (
"fmt"
"github.com/spf13/pflag"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -29,6 +31,15 @@ type ContainerRuntimeOptions struct {
ContainerRuntime string
// RuntimeCgroups that container runtime is expected to be isolated in.
RuntimeCgroups string
// RedirectContainerStreaming enables container streaming redirect.
// When RedirectContainerStreaming is false, kubelet will proxy container streaming data
// between apiserver and container runtime. This approach is more secure, but the proxy
// introduces some overhead.
// When RedirectContainerStreaming is true, kubelet will return an http redirect to apiserver,
// and apiserver will access container runtime directly. This approach is more performant,
// but less secure because the connection between apiserver and container runtime is not
// authenticated.
RedirectContainerStreaming bool
// Docker-specific options.
@ -67,45 +78,30 @@ type ContainerRuntimeOptions struct {
// CNIBinDir is the full path of the directory in which to search for
// CNI plugin binaries
CNIBinDir string
// rkt-specific options.
// rktPath is the path of rkt binary. Leave empty to use the first rkt in $PATH.
RktPath string
// rktApiEndpoint is the endpoint of the rkt API service to communicate with.
RktAPIEndpoint string
// rktStage1Image is the image to use as stage1. Local paths and
// http/https URLs are supported.
RktStage1Image string
}
func (s *ContainerRuntimeOptions) AddFlags(fs *pflag.FlagSet) {
dockerOnlyWarning := "This docker-specific flag only works when container-runtime is set to docker."
// General settings.
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'.")
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'remote', 'rkt (deprecated)'.")
fs.StringVar(&s.RuntimeCgroups, "runtime-cgroups", s.RuntimeCgroups, "Optional absolute name of cgroups to create and run the runtime in.")
fs.BoolVar(&s.RedirectContainerStreaming, "redirect-container-streaming", s.RedirectContainerStreaming, "Enables container streaming redirect. If false, kubelet will proxy container streaming data between apiserver and container runtime; if true, kubelet will return an http redirect to apiserver, and apiserver will access container runtime directly. The proxy approach is more secure, but introduces some overhead. The redirect approach is more performant, but less secure because the connection between apiserver and container runtime may not be authenticated.")
// Docker-specific settings.
fs.BoolVar(&s.ExperimentalDockershim, "experimental-dockershim", s.ExperimentalDockershim, "Enable dockershim only mode. In this mode, kubelet will only start dockershim without any other functionalities. This flag only serves test purpose, please do not use it unless you are conscious of what you are doing. [default=false]")
fs.MarkHidden("experimental-dockershim")
fs.StringVar(&s.DockershimRootDirectory, "experimental-dockershim-root-directory", s.DockershimRootDirectory, "Path to the dockershim root directory.")
fs.MarkHidden("experimental-dockershim-root-directory")
fs.BoolVar(&s.DockerDisableSharedPID, "docker-disable-shared-pid", s.DockerDisableSharedPID, "Setting this to false causes Kubernetes to create pods using a shared process namespace for containers in a pod when running with Docker 1.13.1 or higher. A future Kubernetes release will make this configurable instead in the API.")
fs.BoolVar(&s.DockerDisableSharedPID, "docker-disable-shared-pid", s.DockerDisableSharedPID, fmt.Sprintf("Setting this to false causes Kubernetes to create pods using a shared process namespace for containers in a pod when running with Docker 1.13.1 or higher. A future Kubernetes release will make this configurable instead in the API. %s", dockerOnlyWarning))
fs.MarkDeprecated("docker-disable-shared-pid", "will be removed in a future release. This option will be replaced by PID namespace sharing that is configurable per-pod using the API. See https://features.k8s.io/495")
fs.StringVar(&s.PodSandboxImage, "pod-infra-container-image", s.PodSandboxImage, "The image whose network/ipc namespaces containers in each pod will use.")
fs.StringVar(&s.DockerEndpoint, "docker-endpoint", s.DockerEndpoint, "Use this for the docker endpoint to communicate with")
fs.DurationVar(&s.ImagePullProgressDeadline.Duration, "image-pull-progress-deadline", s.ImagePullProgressDeadline.Duration, "If no pulling progress is made before this deadline, the image pulling will be cancelled.")
fs.StringVar(&s.PodSandboxImage, "pod-infra-container-image", s.PodSandboxImage, fmt.Sprintf("The image whose network/ipc namespaces containers in each pod will use. %s", dockerOnlyWarning))
fs.StringVar(&s.DockerEndpoint, "docker-endpoint", s.DockerEndpoint, fmt.Sprintf("Use this for the docker endpoint to communicate with %s", dockerOnlyWarning))
fs.DurationVar(&s.ImagePullProgressDeadline.Duration, "image-pull-progress-deadline", s.ImagePullProgressDeadline.Duration, fmt.Sprintf("If no pulling progress is made before this deadline, the image pulling will be cancelled. %s", dockerOnlyWarning))
// Network plugin settings. Shared by both docker and rkt.
fs.StringVar(&s.NetworkPluginName, "network-plugin", s.NetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
fs.StringVar(&s.CNIConfDir, "cni-conf-dir", s.CNIConfDir, "<Warning: Alpha feature> The full path of the directory in which to search for CNI config files. Default: /etc/cni/net.d")
fs.StringVar(&s.CNIBinDir, "cni-bin-dir", s.CNIBinDir, "<Warning: Alpha feature> The full path of the directory in which to search for CNI plugin binaries. Default: /opt/cni/bin")
fs.Int32Var(&s.NetworkPluginMTU, "network-plugin-mtu", s.NetworkPluginMTU, "<Warning: Alpha feature> The MTU to be passed to the network plugin, to override the default. Set to 0 to use the default 1460 MTU.")
// Rkt-specific settings.
fs.StringVar(&s.RktPath, "rkt-path", s.RktPath, "Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'.")
fs.MarkDeprecated("rkt-path", "will be removed in a future version. Rktnetes has been deprecated in favor of rktlet (https://github.com/kubernetes-incubator/rktlet).")
fs.StringVar(&s.RktAPIEndpoint, "rkt-api-endpoint", s.RktAPIEndpoint, "The endpoint of the rkt API service to communicate with. Only used if --container-runtime='rkt'.")
fs.MarkDeprecated("rkt-api-endpoint", "will be removed in a future version. Rktnetes has been deprecated in favor of rktlet (https://github.com/kubernetes-incubator/rktlet).")
fs.StringVar(&s.RktStage1Image, "rkt-stage1-image", s.RktStage1Image, "image to use as stage1. Local paths and http/https URLs are supported. If empty, the 'stage1.aci' in the same directory as '--rkt-path' will be used.")
fs.MarkDeprecated("rkt-stage1-image", "will be removed in a future version. Rktnetes has been deprecated in favor of rktlet (https://github.com/kubernetes-incubator/rktlet).")
// Network plugin settings for Docker.
fs.StringVar(&s.NetworkPluginName, "network-plugin", s.NetworkPluginName, fmt.Sprintf("<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle. %s", dockerOnlyWarning))
fs.StringVar(&s.CNIConfDir, "cni-conf-dir", s.CNIConfDir, fmt.Sprintf("<Warning: Alpha feature> The full path of the directory in which to search for CNI config files. Default: /etc/cni/net.d. %s", dockerOnlyWarning))
fs.StringVar(&s.CNIBinDir, "cni-bin-dir", s.CNIBinDir, fmt.Sprintf("<Warning: Alpha feature> A comma-separated list of full paths of directories in which to search for CNI plugin binaries. Default: /opt/cni/bin. %s", dockerOnlyWarning))
fs.Int32Var(&s.NetworkPluginMTU, "network-plugin-mtu", s.NetworkPluginMTU, fmt.Sprintf("<Warning: Alpha feature> The MTU to be passed to the network plugin, to override the default. Set to 0 to use the default 1460 MTU. %s", dockerOnlyWarning))
}

View File

@ -72,7 +72,7 @@ func TestExtractInvalidPods(t *testing.T) {
{
desc: "Invalid volume name",
pod: &v1.Pod{
TypeMeta: metav1.TypeMeta{APIVersion: legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion.String()},
TypeMeta: metav1.TypeMeta{APIVersion: "v1"},
Spec: v1.PodSpec{
Volumes: []v1.Volume{{Name: "_INVALID_"}},
},
@ -81,7 +81,7 @@ func TestExtractInvalidPods(t *testing.T) {
{
desc: "Duplicate volume names",
pod: &v1.Pod{
TypeMeta: metav1.TypeMeta{APIVersion: legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion.String()},
TypeMeta: metav1.TypeMeta{APIVersion: "v1"},
Spec: v1.PodSpec{
Volumes: []v1.Volume{{Name: "repeated"}, {Name: "repeated"}},
},
@ -90,7 +90,7 @@ func TestExtractInvalidPods(t *testing.T) {
{
desc: "Unspecified container name",
pod: &v1.Pod{
TypeMeta: metav1.TypeMeta{APIVersion: legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion.String()},
TypeMeta: metav1.TypeMeta{APIVersion: "v1"},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: ""}},
},
@ -99,7 +99,7 @@ func TestExtractInvalidPods(t *testing.T) {
{
desc: "Invalid container name",
pod: &v1.Pod{
TypeMeta: metav1.TypeMeta{APIVersion: legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion.String()},
TypeMeta: metav1.TypeMeta{APIVersion: "v1"},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "_INVALID_"}},
},
@ -290,7 +290,7 @@ func TestExtractPodsFromHTTP(t *testing.T) {
for _, testCase := range testCases {
var versionedPods runtime.Object
err := testapi.Default.Converter().Convert(&testCase.pods, &versionedPods, nil)
err := legacyscheme.Scheme.Convert(&testCase.pods, &versionedPods, nil)
if err != nil {
t.Fatalf("%s: error in versioning the pods: %s", testCase.desc, err)
}
@ -331,7 +331,7 @@ func TestExtractPodsFromHTTP(t *testing.T) {
func TestURLWithHeader(t *testing.T) {
pod := &v1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion.String(),
APIVersion: "v1",
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{

View File

@ -15,13 +15,12 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/kubelet/configmap",
deps = [
"//pkg/api/v1/pod:go_default_library",
"//pkg/kubelet/util:go_default_library",
"//pkg/kubelet/util/manager:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/clock:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/storage/etcd:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
],
)
@ -44,13 +43,12 @@ go_test(
srcs = ["configmap_manager_test.go"],
embed = [":go_default_library"],
deps = [
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//pkg/kubelet/util/manager:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/clock:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/fake:go_default_library",
"//vendor/k8s.io/client-go/testing:go_default_library",
],
)

View File

@ -18,26 +18,19 @@ package configmap
import (
"fmt"
"strconv"
"sync"
"time"
"k8s.io/api/core/v1"
storageetcd "k8s.io/apiserver/pkg/storage/etcd"
clientset "k8s.io/client-go/kubernetes"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/kubelet/util/manager"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/sets"
)
const (
defaultTTL = time.Minute
)
type Manager interface {
// Get configmap by configmap namespace and name.
GetConfigMap(namespace, name string) (*v1.ConfigMap, error)
@ -73,191 +66,31 @@ func (s *simpleConfigMapManager) RegisterPod(pod *v1.Pod) {
func (s *simpleConfigMapManager) UnregisterPod(pod *v1.Pod) {
}
type GetObjectTTLFunc func() (time.Duration, bool)
type objectKey struct {
namespace string
name string
// configMapManager keeps a cache of all configmaps necessary
// for registered pods. Different implementation of the store
// may result in different semantics for freshness of configmaps
// (e.g. ttl-based implementation vs watch-based implementation).
type configMapManager struct {
manager manager.Manager
}
// configMapStoreItems is a single item stored in configMapStore.
type configMapStoreItem struct {
refCount int
configMap *configMapData
}
type configMapData struct {
sync.Mutex
configMap *v1.ConfigMap
err error
lastUpdateTime time.Time
}
// configMapStore is a local cache of configmaps.
type configMapStore struct {
kubeClient clientset.Interface
clock clock.Clock
lock sync.Mutex
items map[objectKey]*configMapStoreItem
defaultTTL time.Duration
getTTL GetObjectTTLFunc
}
func newConfigMapStore(kubeClient clientset.Interface, clock clock.Clock, getTTL GetObjectTTLFunc, ttl time.Duration) *configMapStore {
return &configMapStore{
kubeClient: kubeClient,
clock: clock,
items: make(map[objectKey]*configMapStoreItem),
defaultTTL: ttl,
getTTL: getTTL,
func (c *configMapManager) GetConfigMap(namespace, name string) (*v1.ConfigMap, error) {
object, err := c.manager.GetObject(namespace, name)
if err != nil {
return nil, err
}
}
func isConfigMapOlder(newConfigMap, oldConfigMap *v1.ConfigMap) bool {
if newConfigMap == nil || oldConfigMap == nil {
return false
if configmap, ok := object.(*v1.ConfigMap); ok {
return configmap, nil
}
newVersion, _ := storageetcd.Versioner.ObjectResourceVersion(newConfigMap)
oldVersion, _ := storageetcd.Versioner.ObjectResourceVersion(oldConfigMap)
return newVersion < oldVersion
return nil, fmt.Errorf("unexpected object type: %v", object)
}
func (s *configMapStore) Add(namespace, name string) {
key := objectKey{namespace: namespace, name: name}
// Add is called from RegisterPod, thus it needs to be efficient.
// Thus Add() is only increasing refCount and generation of a given configmap.
// Then Get() is responsible for fetching if needed.
s.lock.Lock()
defer s.lock.Unlock()
item, exists := s.items[key]
if !exists {
item = &configMapStoreItem{
refCount: 0,
configMap: &configMapData{},
}
s.items[key] = item
}
item.refCount++
// This will trigger fetch on the next Get() operation.
item.configMap = nil
func (c *configMapManager) RegisterPod(pod *v1.Pod) {
c.manager.RegisterPod(pod)
}
func (s *configMapStore) Delete(namespace, name string) {
key := objectKey{namespace: namespace, name: name}
s.lock.Lock()
defer s.lock.Unlock()
if item, ok := s.items[key]; ok {
item.refCount--
if item.refCount == 0 {
delete(s.items, key)
}
}
}
func GetObjectTTLFromNodeFunc(getNode func() (*v1.Node, error)) GetObjectTTLFunc {
return func() (time.Duration, bool) {
node, err := getNode()
if err != nil {
return time.Duration(0), false
}
if node != nil && node.Annotations != nil {
if value, ok := node.Annotations[v1.ObjectTTLAnnotationKey]; ok {
if intValue, err := strconv.Atoi(value); err == nil {
return time.Duration(intValue) * time.Second, true
}
}
}
return time.Duration(0), false
}
}
func (s *configMapStore) isConfigMapFresh(data *configMapData) bool {
configMapTTL := s.defaultTTL
if ttl, ok := s.getTTL(); ok {
configMapTTL = ttl
}
return s.clock.Now().Before(data.lastUpdateTime.Add(configMapTTL))
}
func (s *configMapStore) Get(namespace, name string) (*v1.ConfigMap, error) {
key := objectKey{namespace: namespace, name: name}
data := func() *configMapData {
s.lock.Lock()
defer s.lock.Unlock()
item, exists := s.items[key]
if !exists {
return nil
}
if item.configMap == nil {
item.configMap = &configMapData{}
}
return item.configMap
}()
if data == nil {
return nil, fmt.Errorf("configmap %q/%q not registered", namespace, name)
}
// After updating data in configMapStore, lock the data, fetch configMap if
// needed and return data.
data.Lock()
defer data.Unlock()
if data.err != nil || !s.isConfigMapFresh(data) {
opts := metav1.GetOptions{}
if data.configMap != nil && data.err == nil {
// This is just a periodic refresh of a configmap we successfully fetched previously.
// In this case, server data from apiserver cache to reduce the load on both
// etcd and apiserver (the cache is eventually consistent).
util.FromApiserverCache(&opts)
}
configMap, err := s.kubeClient.CoreV1().ConfigMaps(namespace).Get(name, opts)
if err != nil && !apierrors.IsNotFound(err) && data.configMap == nil && data.err == nil {
// Couldn't fetch the latest configmap, but there is no cached data to return.
// Return the fetch result instead.
return configMap, err
}
if (err == nil && !isConfigMapOlder(configMap, data.configMap)) || apierrors.IsNotFound(err) {
// If the fetch succeeded with a newer version of the configmap, or if the
// configmap could not be found in the apiserver, update the cached data to
// reflect the current status.
data.configMap = configMap
data.err = err
data.lastUpdateTime = s.clock.Now()
}
}
return data.configMap, data.err
}
// cachingConfigMapManager keeps a cache of all configmaps necessary for registered pods.
// It implements the following logic:
// - whenever a pod is created or updated, the cached versions of all its configmaps
// are invalidated
// - every GetConfigMap() call tries to fetch the value from local cache; if it is
// not there, invalidated or too old, we fetch it from apiserver and refresh the
// value in cache; otherwise it is just fetched from cache
type cachingConfigMapManager struct {
configMapStore *configMapStore
lock sync.Mutex
registeredPods map[objectKey]*v1.Pod
}
func NewCachingConfigMapManager(kubeClient clientset.Interface, getTTL GetObjectTTLFunc) Manager {
csm := &cachingConfigMapManager{
configMapStore: newConfigMapStore(kubeClient, clock.RealClock{}, getTTL, defaultTTL),
registeredPods: make(map[objectKey]*v1.Pod),
}
return csm
}
func (c *cachingConfigMapManager) GetConfigMap(namespace, name string) (*v1.ConfigMap, error) {
return c.configMapStore.Get(namespace, name)
func (c *configMapManager) UnregisterPod(pod *v1.Pod) {
c.manager.UnregisterPod(pod)
}
func getConfigMapNames(pod *v1.Pod) sets.String {
@ -269,39 +102,24 @@ func getConfigMapNames(pod *v1.Pod) sets.String {
return result
}
func (c *cachingConfigMapManager) RegisterPod(pod *v1.Pod) {
names := getConfigMapNames(pod)
c.lock.Lock()
defer c.lock.Unlock()
for name := range names {
c.configMapStore.Add(pod.Namespace, name)
}
var prev *v1.Pod
key := objectKey{namespace: pod.Namespace, name: pod.Name}
prev = c.registeredPods[key]
c.registeredPods[key] = pod
if prev != nil {
for name := range getConfigMapNames(prev) {
// On an update, the .Add() call above will have re-incremented the
// ref count of any existing items, so any configmaps that are in both
// names and prev need to have their ref counts decremented. Any that
// are only in prev need to be completely removed. This unconditional
// call takes care of both cases.
c.configMapStore.Delete(prev.Namespace, name)
}
}
}
const (
defaultTTL = time.Minute
)
func (c *cachingConfigMapManager) UnregisterPod(pod *v1.Pod) {
var prev *v1.Pod
key := objectKey{namespace: pod.Namespace, name: pod.Name}
c.lock.Lock()
defer c.lock.Unlock()
prev = c.registeredPods[key]
delete(c.registeredPods, key)
if prev != nil {
for name := range getConfigMapNames(prev) {
c.configMapStore.Delete(prev.Namespace, name)
}
// NewCachingConfigMapManager creates a manager that keeps a cache of all configmaps
// necessary for registered pods.
// It implement the following logic:
// - whenever a pod is create or updated, the cached versions of all configmaps
// are invalidated
// - every GetObject() call tries to fetch the value from local cache; if it is
// not there, invalidated or too old, we fetch it from apiserver and refresh the
// value in cache; otherwise it is just fetched from cache
func NewCachingConfigMapManager(kubeClient clientset.Interface, getTTL manager.GetObjectTTLFunc) Manager {
getConfigMap := func(namespace, name string, opts metav1.GetOptions) (runtime.Object, error) {
return kubeClient.CoreV1().ConfigMaps(namespace).Get(name, opts)
}
configMapStore := manager.NewObjectStore(getConfigMap, clock.RealClock{}, getTTL, defaultTTL)
return &configMapManager{
manager: manager.NewCacheBasedManager(configMapStore, getConfigMapNames),
}
}

View File

@ -18,30 +18,27 @@ package configmap
import (
"fmt"
"reflect"
"strings"
"sync"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/fake"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
core "k8s.io/client-go/testing"
"github.com/stretchr/testify/assert"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/kubernetes/pkg/kubelet/util/manager"
)
func checkConfigMap(t *testing.T, store *configMapStore, ns, name string, shouldExist bool) {
func checkObject(t *testing.T, store manager.Store, ns, name string, shouldExist bool) {
_, err := store.Get(ns, name)
if shouldExist && err != nil {
t.Errorf("unexpected actions: %#v", err)
}
if !shouldExist && (err == nil || !strings.Contains(err.Error(), fmt.Sprintf("configmap %q/%q not registered", ns, name))) {
if !shouldExist && (err == nil || !strings.Contains(err.Error(), fmt.Sprintf("object %q/%q not registered", ns, name))) {
t.Errorf("unexpected actions: %#v", err)
}
}
@ -50,242 +47,9 @@ func noObjectTTL() (time.Duration, bool) {
return time.Duration(0), false
}
func TestConfigMapStore(t *testing.T) {
fakeClient := &fake.Clientset{}
store := newConfigMapStore(fakeClient, clock.RealClock{}, noObjectTTL, 0)
store.Add("ns1", "name1")
store.Add("ns2", "name2")
store.Add("ns1", "name1")
store.Add("ns1", "name1")
store.Delete("ns1", "name1")
store.Delete("ns2", "name2")
store.Add("ns3", "name3")
// Adds don't issue Get requests.
actions := fakeClient.Actions()
assert.Equal(t, 0, len(actions), "unexpected actions: %#v", actions)
// Should issue Get request
store.Get("ns1", "name1")
// Shouldn't issue Get request, as configMap is not registered
store.Get("ns2", "name2")
// Should issue Get request
store.Get("ns3", "name3")
actions = fakeClient.Actions()
assert.Equal(t, 2, len(actions), "unexpected actions: %#v", actions)
for _, a := range actions {
assert.True(t, a.Matches("get", "configmaps"), "unexpected actions: %#v", a)
}
checkConfigMap(t, store, "ns1", "name1", true)
checkConfigMap(t, store, "ns2", "name2", false)
checkConfigMap(t, store, "ns3", "name3", true)
checkConfigMap(t, store, "ns4", "name4", false)
}
func TestConfigMapStoreDeletingConfigMap(t *testing.T) {
fakeClient := &fake.Clientset{}
store := newConfigMapStore(fakeClient, clock.RealClock{}, noObjectTTL, 0)
store.Add("ns", "name")
result := &v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "name", ResourceVersion: "10"}}
fakeClient.AddReactor("get", "configmaps", func(action core.Action) (bool, runtime.Object, error) {
return true, result, nil
})
configMap, err := store.Get("ns", "name")
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if !reflect.DeepEqual(configMap, result) {
t.Errorf("Unexpected configMap: %v", configMap)
}
fakeClient.PrependReactor("get", "configmaps", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.ConfigMap{}, apierrors.NewNotFound(v1.Resource("configMap"), "name")
})
configMap, err = store.Get("ns", "name")
if err == nil || !apierrors.IsNotFound(err) {
t.Errorf("Unexpected error: %v", err)
}
if !reflect.DeepEqual(configMap, &v1.ConfigMap{}) {
t.Errorf("Unexpected configMap: %v", configMap)
}
}
func TestConfigMapStoreGetAlwaysRefresh(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClock := clock.NewFakeClock(time.Now())
store := newConfigMapStore(fakeClient, fakeClock, noObjectTTL, 0)
for i := 0; i < 10; i++ {
store.Add(fmt.Sprintf("ns-%d", i), fmt.Sprintf("name-%d", i))
}
fakeClient.ClearActions()
wg := sync.WaitGroup{}
wg.Add(100)
for i := 0; i < 100; i++ {
go func(i int) {
store.Get(fmt.Sprintf("ns-%d", i%10), fmt.Sprintf("name-%d", i%10))
wg.Done()
}(i)
}
wg.Wait()
actions := fakeClient.Actions()
assert.Equal(t, 100, len(actions), "unexpected actions: %#v", actions)
for _, a := range actions {
assert.True(t, a.Matches("get", "configmaps"), "unexpected actions: %#v", a)
}
}
func TestConfigMapStoreGetNeverRefresh(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClock := clock.NewFakeClock(time.Now())
store := newConfigMapStore(fakeClient, fakeClock, noObjectTTL, time.Minute)
for i := 0; i < 10; i++ {
store.Add(fmt.Sprintf("ns-%d", i), fmt.Sprintf("name-%d", i))
}
fakeClient.ClearActions()
wg := sync.WaitGroup{}
wg.Add(100)
for i := 0; i < 100; i++ {
go func(i int) {
store.Get(fmt.Sprintf("ns-%d", i%10), fmt.Sprintf("name-%d", i%10))
wg.Done()
}(i)
}
wg.Wait()
actions := fakeClient.Actions()
// Only first Get, should forward the Get request.
assert.Equal(t, 10, len(actions), "unexpected actions: %#v", actions)
}
func TestCustomTTL(t *testing.T) {
ttl := time.Duration(0)
ttlExists := false
customTTL := func() (time.Duration, bool) {
return ttl, ttlExists
}
fakeClient := &fake.Clientset{}
fakeClock := clock.NewFakeClock(time.Time{})
store := newConfigMapStore(fakeClient, fakeClock, customTTL, time.Minute)
store.Add("ns", "name")
store.Get("ns", "name")
fakeClient.ClearActions()
// Set 0-ttl and see if that works.
ttl = time.Duration(0)
ttlExists = true
store.Get("ns", "name")
actions := fakeClient.Actions()
assert.Equal(t, 1, len(actions), "unexpected actions: %#v", actions)
fakeClient.ClearActions()
// Set 5-minute ttl and see if this works.
ttl = time.Duration(5) * time.Minute
store.Get("ns", "name")
actions = fakeClient.Actions()
assert.Equal(t, 0, len(actions), "unexpected actions: %#v", actions)
// Still no effect after 4 minutes.
fakeClock.Step(4 * time.Minute)
store.Get("ns", "name")
actions = fakeClient.Actions()
assert.Equal(t, 0, len(actions), "unexpected actions: %#v", actions)
// Now it should have an effect.
fakeClock.Step(time.Minute)
store.Get("ns", "name")
actions = fakeClient.Actions()
assert.Equal(t, 1, len(actions), "unexpected actions: %#v", actions)
fakeClient.ClearActions()
// Now remove the custom ttl and see if that works.
ttlExists = false
fakeClock.Step(55 * time.Second)
store.Get("ns", "name")
actions = fakeClient.Actions()
assert.Equal(t, 0, len(actions), "unexpected actions: %#v", actions)
// Pass the minute and it should be triggered now.
fakeClock.Step(5 * time.Second)
store.Get("ns", "name")
actions = fakeClient.Actions()
assert.Equal(t, 1, len(actions), "unexpected actions: %#v", actions)
}
func TestParseNodeAnnotation(t *testing.T) {
testCases := []struct {
node *v1.Node
err error
exists bool
ttl time.Duration
}{
{
node: nil,
err: fmt.Errorf("error"),
exists: false,
},
{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
},
exists: false,
},
{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
Annotations: map[string]string{},
},
},
exists: false,
},
{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
Annotations: map[string]string{v1.ObjectTTLAnnotationKey: "bad"},
},
},
exists: false,
},
{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
Annotations: map[string]string{v1.ObjectTTLAnnotationKey: "0"},
},
},
exists: true,
ttl: time.Duration(0),
},
{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
Annotations: map[string]string{v1.ObjectTTLAnnotationKey: "60"},
},
},
exists: true,
ttl: time.Minute,
},
}
for i, testCase := range testCases {
getNode := func() (*v1.Node, error) { return testCase.node, testCase.err }
ttl, exists := GetObjectTTLFromNodeFunc(getNode)()
if exists != testCase.exists {
t.Errorf("%d: incorrect parsing: %t", i, exists)
continue
}
if exists && ttl != testCase.ttl {
t.Errorf("%d: incorrect ttl: %v", i, ttl)
}
func getConfigMap(fakeClient clientset.Interface) manager.GetObjectFunc {
return func(namespace, name string, opts metav1.GetOptions) (runtime.Object, error) {
return fakeClient.CoreV1().ConfigMaps(namespace).Get(name, opts)
}
}
@ -348,152 +112,11 @@ func podWithConfigMaps(ns, podName string, toAttach configMapsToAttach) *v1.Pod
return pod
}
func TestCacheInvalidation(t *testing.T) {
func TestCacheBasedConfigMapManager(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClock := clock.NewFakeClock(time.Now())
store := newConfigMapStore(fakeClient, fakeClock, noObjectTTL, time.Minute)
manager := &cachingConfigMapManager{
configMapStore: store,
registeredPods: make(map[objectKey]*v1.Pod),
}
// Create a pod with some configMaps.
s1 := configMapsToAttach{
containerEnvConfigMaps: []envConfigMaps{
{envVarNames: []string{"s1"}, envFromNames: []string{"s10"}},
{envVarNames: []string{"s2"}},
},
}
manager.RegisterPod(podWithConfigMaps("ns1", "name1", s1))
// Fetch both configMaps - this should triggger get operations.
store.Get("ns1", "s1")
store.Get("ns1", "s10")
store.Get("ns1", "s2")
actions := fakeClient.Actions()
assert.Equal(t, 3, len(actions), "unexpected actions: %#v", actions)
fakeClient.ClearActions()
// Update a pod with a new configMap.
s2 := configMapsToAttach{
containerEnvConfigMaps: []envConfigMaps{
{envVarNames: []string{"s1"}},
{envVarNames: []string{"s2"}, envFromNames: []string{"s20"}},
},
volumes: []string{"s3"},
}
manager.RegisterPod(podWithConfigMaps("ns1", "name1", s2))
// All configMaps should be invalidated - this should trigger get operations.
store.Get("ns1", "s1")
store.Get("ns1", "s2")
store.Get("ns1", "s20")
store.Get("ns1", "s3")
actions = fakeClient.Actions()
assert.Equal(t, 4, len(actions), "unexpected actions: %#v", actions)
fakeClient.ClearActions()
// Create a new pod that is refencing the first three configMaps - those should
// be invalidated.
manager.RegisterPod(podWithConfigMaps("ns1", "name2", s1))
store.Get("ns1", "s1")
store.Get("ns1", "s10")
store.Get("ns1", "s2")
store.Get("ns1", "s20")
store.Get("ns1", "s3")
actions = fakeClient.Actions()
assert.Equal(t, 3, len(actions), "unexpected actions: %#v", actions)
fakeClient.ClearActions()
}
func TestCacheRefcounts(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClock := clock.NewFakeClock(time.Now())
store := newConfigMapStore(fakeClient, fakeClock, noObjectTTL, time.Minute)
manager := &cachingConfigMapManager{
configMapStore: store,
registeredPods: make(map[objectKey]*v1.Pod),
}
s1 := configMapsToAttach{
containerEnvConfigMaps: []envConfigMaps{
{envVarNames: []string{"s1"}, envFromNames: []string{"s10"}},
{envVarNames: []string{"s2"}},
},
volumes: []string{"s3"},
}
manager.RegisterPod(podWithConfigMaps("ns1", "name1", s1))
manager.RegisterPod(podWithConfigMaps("ns1", "name2", s1))
s2 := configMapsToAttach{
containerEnvConfigMaps: []envConfigMaps{
{envVarNames: []string{"s4"}},
{envVarNames: []string{"s5"}, envFromNames: []string{"s50"}},
},
}
manager.RegisterPod(podWithConfigMaps("ns1", "name2", s2))
manager.RegisterPod(podWithConfigMaps("ns1", "name3", s2))
manager.RegisterPod(podWithConfigMaps("ns1", "name4", s2))
manager.UnregisterPod(podWithConfigMaps("ns1", "name3", s2))
s3 := configMapsToAttach{
containerEnvConfigMaps: []envConfigMaps{
{envVarNames: []string{"s3"}, envFromNames: []string{"s30"}},
{envVarNames: []string{"s5"}},
},
}
manager.RegisterPod(podWithConfigMaps("ns1", "name5", s3))
manager.RegisterPod(podWithConfigMaps("ns1", "name6", s3))
s4 := configMapsToAttach{
containerEnvConfigMaps: []envConfigMaps{
{envVarNames: []string{"s6"}},
{envFromNames: []string{"s60"}},
},
}
manager.RegisterPod(podWithConfigMaps("ns1", "name7", s4))
manager.UnregisterPod(podWithConfigMaps("ns1", "name7", s4))
// Also check the Add + Update + Remove scenario.
manager.RegisterPod(podWithConfigMaps("ns1", "other-name", s1))
manager.RegisterPod(podWithConfigMaps("ns1", "other-name", s2))
manager.UnregisterPod(podWithConfigMaps("ns1", "other-name", s2))
s5 := configMapsToAttach{
containerEnvConfigMaps: []envConfigMaps{
{envVarNames: []string{"s7"}},
{envFromNames: []string{"s70"}},
},
}
// Check the no-op update scenario
manager.RegisterPod(podWithConfigMaps("ns1", "noop-pod", s5))
manager.RegisterPod(podWithConfigMaps("ns1", "noop-pod", s5))
refs := func(ns, name string) int {
store.lock.Lock()
defer store.lock.Unlock()
item, ok := store.items[objectKey{ns, name}]
if !ok {
return 0
}
return item.refCount
}
assert.Equal(t, 1, refs("ns1", "s1"))
assert.Equal(t, 1, refs("ns1", "s10"))
assert.Equal(t, 1, refs("ns1", "s2"))
assert.Equal(t, 3, refs("ns1", "s3"))
assert.Equal(t, 2, refs("ns1", "s30"))
assert.Equal(t, 2, refs("ns1", "s4"))
assert.Equal(t, 4, refs("ns1", "s5"))
assert.Equal(t, 2, refs("ns1", "s50"))
assert.Equal(t, 0, refs("ns1", "s6"))
assert.Equal(t, 0, refs("ns1", "s60"))
assert.Equal(t, 1, refs("ns1", "s7"))
assert.Equal(t, 1, refs("ns1", "s70"))
}
func TestCachingConfigMapManager(t *testing.T) {
fakeClient := &fake.Clientset{}
configMapStore := newConfigMapStore(fakeClient, clock.RealClock{}, noObjectTTL, 0)
manager := &cachingConfigMapManager{
configMapStore: configMapStore,
registeredPods: make(map[objectKey]*v1.Pod),
store := manager.NewObjectStore(getConfigMap(fakeClient), clock.RealClock{}, noObjectTTL, 0)
manager := &configMapManager{
manager: manager.NewCacheBasedManager(store, getConfigMapNames),
}
// Create a pod with some configMaps.
@ -543,7 +166,7 @@ func TestCachingConfigMapManager(t *testing.T) {
for _, ns := range []string{"ns1", "ns2", "ns3"} {
for _, configMap := range []string{"s1", "s2", "s3", "s4", "s5", "s6", "s20", "s40", "s50"} {
checkConfigMap(t, configMapStore, ns, configMap, shouldExist(ns, configMap))
checkObject(t, store, ns, configMap, shouldExist(ns, configMap))
}
}
}

View File

@ -14,49 +14,13 @@ go_library(
"runtime_cache.go",
"runtime_cache_fake.go",
"sync_result.go",
] + select({
"@io_bazel_rules_go//go/platform:android": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:darwin": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:linux": [
"pty_linux.go",
],
"@io_bazel_rules_go//go/platform:nacl": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:plan9": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:solaris": [
"pty_unsupported.go",
],
"@io_bazel_rules_go//go/platform:windows": [
"pty_unsupported.go",
],
"//conditions:default": [],
}),
],
importpath = "k8s.io/kubernetes/pkg/kubelet/container",
visibility = ["//visibility:public"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/kubelet/apis/cri/runtime/v1alpha2:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/kubelet/util/ioutils:go_default_library",
"//pkg/util/hash:go_default_library",
"//pkg/volume:go_default_library",
"//third_party/forked/golang/expansion:go_default_library",
@ -71,12 +35,7 @@ go_library(
"//vendor/k8s.io/client-go/tools/reference:go_default_library",
"//vendor/k8s.io/client-go/tools/remotecommand:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/github.com/kr/pty:go_default_library",
],
"//conditions:default": [],
}),
],
)
go_test(
@ -89,7 +48,6 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",

View File

@ -17,12 +17,9 @@ limitations under the License.
package container
import (
"bytes"
"fmt"
"hash/adler32"
"hash/fnv"
"strings"
"time"
"github.com/golang/glog"
@ -33,7 +30,6 @@ import (
"k8s.io/client-go/tools/record"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/kubelet/util/ioutils"
hashutil "k8s.io/kubernetes/pkg/util/hash"
"k8s.io/kubernetes/third_party/forked/golang/expansion"
)
@ -46,7 +42,7 @@ type HandlerRunner interface {
// RuntimeHelper wraps kubelet to make container runtime
// able to get necessary informations like the RunContainerOptions, DNS settings, Host IP.
type RuntimeHelper interface {
GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, err error)
GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, cleanupAction func(), err error)
GetPodDNS(pod *v1.Pod) (dnsConfig *runtimeapi.DNSConfig, err error)
// GetPodCgroupParent returns the CgroupName identifier, and its literal cgroupfs form on the host
// of a pod.
@ -100,17 +96,6 @@ func HashContainer(container *v1.Container) uint64 {
return uint64(hash.Sum32())
}
// HashContainerLegacy returns the hash of the container. It is used to compare
// the running container with its desired spec.
// This is used by rktnetes and dockershim (for handling <=1.5 containers).
// TODO: Remove this function when kubernetes version is >=1.8 AND rktnetes
// update its hash function.
func HashContainerLegacy(container *v1.Container) uint64 {
hash := adler32.New()
hashutil.DeepHashObject(hash, *container)
return uint64(hash.Sum32())
}
// EnvVarsToMap constructs a map of environment name to value from a slice
// of env vars.
func EnvVarsToMap(envs []EnvVar) map[string]string {
@ -145,6 +130,11 @@ func ExpandContainerCommandOnlyStatic(containerCommand []string, envs []v1.EnvVa
return command
}
func ExpandContainerVolumeMounts(mount v1.VolumeMount, envs []EnvVar) (expandedSubpath string) {
mapping := expansion.MappingFuncFor(EnvVarsToMap(envs))
return expansion.Expand(mount.SubPath, mapping)
}
func ExpandContainerCommandAndArgs(container *v1.Container, envs []EnvVar) (command []string, args []string) {
mapping := expansion.MappingFuncFor(EnvVarsToMap(envs))
@ -205,6 +195,13 @@ func (irecorder *innerEventRecorder) PastEventf(object runtime.Object, timestamp
}
}
func (irecorder *innerEventRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
if ref, ok := irecorder.shouldRecordEvent(object); ok {
irecorder.recorder.AnnotatedEventf(ref, annotations, eventtype, reason, messageFmt, args...)
}
}
// Pod must not be nil.
func IsHostNetworkPod(pod *v1.Pod) bool {
return pod.Spec.HostNetwork
@ -265,26 +262,6 @@ func FormatPod(pod *Pod) string {
return fmt.Sprintf("%s_%s(%s)", pod.Name, pod.Namespace, pod.ID)
}
type containerCommandRunnerWrapper struct {
DirectStreamingRuntime
}
var _ ContainerCommandRunner = &containerCommandRunnerWrapper{}
func DirectStreamingRunner(runtime DirectStreamingRuntime) ContainerCommandRunner {
return &containerCommandRunnerWrapper{runtime}
}
func (r *containerCommandRunnerWrapper) RunInContainer(id ContainerID, cmd []string, timeout time.Duration) ([]byte, error) {
var buffer bytes.Buffer
output := ioutils.WriteCloserWrapper(&buffer)
err := r.ExecInContainer(id, cmd, nil, output, output, false, nil, timeout)
// Even if err is non-nil, there still may be output (e.g. the exec wrote to stdout or stderr but
// the command returned a nonzero exit code). Therefore, always return the output along with the
// error.
return buffer.Bytes(), err
}
// GetContainerSpec gets the container spec by containerName.
func GetContainerSpec(pod *v1.Pod, containerName string) *v1.Container {
for i, c := range pod.Spec.Containers {
@ -312,21 +289,6 @@ func HasPrivilegedContainer(pod *v1.Pod) bool {
return false
}
// MakeCapabilities creates string slices from Capability slices
func MakeCapabilities(capAdd []v1.Capability, capDrop []v1.Capability) ([]string, []string) {
var (
addCaps []string
dropCaps []string
)
for _, cap := range capAdd {
addCaps = append(addCaps, string(cap))
}
for _, cap := range capDrop {
dropCaps = append(dropCaps, string(cap))
}
return addCaps, dropCaps
}
// MakePortMappings creates internal port mapping from api port mapping.
func MakePortMappings(container *v1.Container) (ports []PortMapping) {
names := make(map[string]struct{})

Some files were not shown because too many files have changed in this diff Show More