mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 18:43:34 +00:00
rebase: update K8s packages to v0.32.1
Update K8s packages in go.mod to v0.32.1 Signed-off-by: Praveen M <m.praveen@ibm.com>
This commit is contained in:
9
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/OWNERS
generated
vendored
Normal file
9
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/OWNERS
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
# Disable inheritance as this is an api owners file
|
||||
options:
|
||||
no_parent_owners: true
|
||||
approvers:
|
||||
- api-approvers
|
||||
reviewers:
|
||||
- sig-node-api-reviewers
|
20
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/doc.go
generated
vendored
Normal file
20
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/doc.go
generated
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// +k8s:deepcopy-gen=package
|
||||
// +groupName=kubelet.config.k8s.io
|
||||
|
||||
package config // import "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
32
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/helpers.go
generated
vendored
Normal file
32
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/helpers.go
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
// KubeletConfigurationPathRefs returns pointers to all of the KubeletConfiguration fields that contain filepaths.
|
||||
// You might use this, for example, to resolve all relative paths against some common root before
|
||||
// passing the configuration to the application. This method must be kept up to date as new fields are added.
|
||||
func KubeletConfigurationPathRefs(kc *KubeletConfiguration) []*string {
|
||||
paths := []*string{}
|
||||
paths = append(paths, &kc.StaticPodPath)
|
||||
paths = append(paths, &kc.Authentication.X509.ClientCAFile)
|
||||
paths = append(paths, &kc.TLSCertFile)
|
||||
paths = append(paths, &kc.TLSPrivateKeyFile)
|
||||
paths = append(paths, &kc.ResolverConfig)
|
||||
paths = append(paths, &kc.VolumePluginDir)
|
||||
paths = append(paths, &kc.PodLogsDir)
|
||||
return paths
|
||||
}
|
45
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/register.go
generated
vendored
Normal file
45
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/register.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
)
|
||||
|
||||
// GroupName is the group name used in this package
|
||||
const GroupName = "kubelet.config.k8s.io"
|
||||
|
||||
// SchemeGroupVersion is group version used to register these objects
|
||||
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
|
||||
|
||||
var (
|
||||
// SchemeBuilder is the scheme builder with scheme init functions to run for this API package
|
||||
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
|
||||
// AddToScheme is a global function that registers this API group & version to a scheme
|
||||
AddToScheme = SchemeBuilder.AddToScheme
|
||||
)
|
||||
|
||||
// addKnownTypes registers known types to the given scheme
|
||||
func addKnownTypes(scheme *runtime.Scheme) error {
|
||||
scheme.AddKnownTypes(SchemeGroupVersion,
|
||||
&KubeletConfiguration{},
|
||||
&SerializedNodeConfigSource{},
|
||||
&CredentialProviderConfig{},
|
||||
)
|
||||
return nil
|
||||
}
|
704
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/types.go
generated
vendored
Normal file
704
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/types.go
generated
vendored
Normal file
@ -0,0 +1,704 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
logsapi "k8s.io/component-base/logs/api/v1"
|
||||
tracingapi "k8s.io/component-base/tracing/api/v1"
|
||||
)
|
||||
|
||||
// HairpinMode denotes how the kubelet should configure networking to handle
|
||||
// hairpin packets.
|
||||
type HairpinMode string
|
||||
|
||||
// Enum settings for different ways to handle hairpin packets.
|
||||
const (
|
||||
// Set the hairpin flag on the veth of containers in the respective
|
||||
// container runtime.
|
||||
HairpinVeth = "hairpin-veth"
|
||||
// Make the container bridge promiscuous. This will force it to accept
|
||||
// hairpin packets, even if the flag isn't set on ports of the bridge.
|
||||
PromiscuousBridge = "promiscuous-bridge"
|
||||
// Neither of the above. If the kubelet is started in this hairpin mode
|
||||
// and kube-proxy is running in iptables mode, hairpin packets will be
|
||||
// dropped by the container bridge.
|
||||
HairpinNone = "none"
|
||||
)
|
||||
|
||||
// ResourceChangeDetectionStrategy denotes a mode in which internal
|
||||
// managers (secret, configmap) are discovering object changes.
|
||||
type ResourceChangeDetectionStrategy string
|
||||
|
||||
// Enum settings for different strategies of kubelet managers.
|
||||
const (
|
||||
// GetChangeDetectionStrategy is a mode in which kubelet fetches
|
||||
// necessary objects directly from apiserver.
|
||||
GetChangeDetectionStrategy ResourceChangeDetectionStrategy = "Get"
|
||||
// TTLCacheChangeDetectionStrategy is a mode in which kubelet uses
|
||||
// ttl cache for object directly fetched from apiserver.
|
||||
TTLCacheChangeDetectionStrategy ResourceChangeDetectionStrategy = "Cache"
|
||||
// WatchChangeDetectionStrategy is a mode in which kubelet uses
|
||||
// watches to observe changes to objects that are in its interest.
|
||||
WatchChangeDetectionStrategy ResourceChangeDetectionStrategy = "Watch"
|
||||
// RestrictedTopologyManagerPolicy is a mode in which kubelet only allows
|
||||
// pods with optimal NUMA node alignment for requested resources
|
||||
RestrictedTopologyManagerPolicy = "restricted"
|
||||
// BestEffortTopologyManagerPolicy is a mode in which kubelet will favour
|
||||
// pods with NUMA alignment of CPU and device resources.
|
||||
BestEffortTopologyManagerPolicy = "best-effort"
|
||||
// NoneTopologyManagerPolicy is a mode in which kubelet has no knowledge
|
||||
// of NUMA alignment of a pod's CPU and device resources.
|
||||
NoneTopologyManagerPolicy = "none"
|
||||
// SingleNumaNodeTopologyManagerPolicy is a mode in which kubelet only allows
|
||||
// pods with a single NUMA alignment of CPU and device resources.
|
||||
SingleNumaNodeTopologyManagerPolicy = "single-numa-node"
|
||||
// ContainerTopologyManagerScope represents that
|
||||
// topology policy is applied on a per-container basis.
|
||||
ContainerTopologyManagerScope = "container"
|
||||
// PodTopologyManagerScope represents that
|
||||
// topology policy is applied on a per-pod basis.
|
||||
PodTopologyManagerScope = "pod"
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// KubeletConfiguration contains the configuration for the Kubelet
|
||||
type KubeletConfiguration struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// enableServer enables Kubelet's secured server.
|
||||
// Note: Kubelet's insecure port is controlled by the readOnlyPort option.
|
||||
EnableServer bool
|
||||
// staticPodPath is the path to the directory containing local (static) pods to
|
||||
// run, or the path to a single static pod file.
|
||||
StaticPodPath string
|
||||
// podLogsDir is a custom root directory path kubelet will use to place pod's log files.
|
||||
// Default: "/var/log/pods/"
|
||||
// Note: it is not recommended to use the temp folder as a log directory as it may cause
|
||||
// unexpected behavior in many places.
|
||||
PodLogsDir string
|
||||
// syncFrequency is the max period between synchronizing running
|
||||
// containers and config
|
||||
SyncFrequency metav1.Duration
|
||||
// fileCheckFrequency is the duration between checking config files for
|
||||
// new data
|
||||
FileCheckFrequency metav1.Duration
|
||||
// httpCheckFrequency is the duration between checking http for new data
|
||||
HTTPCheckFrequency metav1.Duration
|
||||
// staticPodURL is the URL for accessing static pods to run
|
||||
StaticPodURL string
|
||||
// staticPodURLHeader is a map of slices with HTTP headers to use when accessing the podURL
|
||||
StaticPodURLHeader map[string][]string `datapolicy:"token"`
|
||||
// address is the IP address for the Kubelet to serve on (set to 0.0.0.0
|
||||
// for all interfaces)
|
||||
Address string
|
||||
// port is the port for the Kubelet to serve on.
|
||||
Port int32
|
||||
// readOnlyPort is the read-only port for the Kubelet to serve on with
|
||||
// no authentication/authorization (set to 0 to disable)
|
||||
ReadOnlyPort int32
|
||||
// volumePluginDir is the full path of the directory in which to search
|
||||
// for additional third party volume plugins.
|
||||
VolumePluginDir string
|
||||
// providerID, if set, sets the unique id of the instance that an external provider (i.e. cloudprovider)
|
||||
// can use to identify a specific node
|
||||
ProviderID string
|
||||
// tlsCertFile is the file containing x509 Certificate for HTTPS. (CA cert,
|
||||
// if any, concatenated after server cert). If tlsCertFile and
|
||||
// tlsPrivateKeyFile are not provided, a self-signed certificate
|
||||
// and key are generated for the public address and saved to the directory
|
||||
// passed to the Kubelet's --cert-dir flag.
|
||||
TLSCertFile string
|
||||
// tlsPrivateKeyFile is the file containing x509 private key matching tlsCertFile
|
||||
TLSPrivateKeyFile string
|
||||
// TLSCipherSuites is the list of allowed cipher suites for the server.
|
||||
// Note that TLS 1.3 ciphersuites are not configurable.
|
||||
// Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants).
|
||||
TLSCipherSuites []string
|
||||
// TLSMinVersion is the minimum TLS version supported.
|
||||
// Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants).
|
||||
TLSMinVersion string
|
||||
// rotateCertificates enables client certificate rotation. The Kubelet will request a
|
||||
// new certificate from the certificates.k8s.io API. This requires an approver to approve the
|
||||
// certificate signing requests.
|
||||
RotateCertificates bool
|
||||
// serverTLSBootstrap enables server certificate bootstrap. Instead of self
|
||||
// signing a serving certificate, the Kubelet will request a certificate from
|
||||
// the certificates.k8s.io API. This requires an approver to approve the
|
||||
// certificate signing requests. The RotateKubeletServerCertificate feature
|
||||
// must be enabled.
|
||||
ServerTLSBootstrap bool
|
||||
// authentication specifies how requests to the Kubelet's server are authenticated
|
||||
Authentication KubeletAuthentication
|
||||
// authorization specifies how requests to the Kubelet's server are authorized
|
||||
Authorization KubeletAuthorization
|
||||
// registryPullQPS is the limit of registry pulls per second.
|
||||
// Set to 0 for no limit.
|
||||
RegistryPullQPS int32
|
||||
// registryBurst is the maximum size of bursty pulls, temporarily allows
|
||||
// pulls to burst to this number, while still not exceeding registryPullQPS.
|
||||
// Only used if registryPullQPS > 0.
|
||||
RegistryBurst int32
|
||||
// eventRecordQPS is the maximum event creations per second. If 0, there
|
||||
// is no limit enforced.
|
||||
EventRecordQPS int32
|
||||
// eventBurst is the maximum size of a burst of event creations, temporarily
|
||||
// allows event creations to burst to this number, while still not exceeding
|
||||
// eventRecordQPS. Only used if eventRecordQPS > 0.
|
||||
EventBurst int32
|
||||
// enableDebuggingHandlers enables server endpoints for log collection
|
||||
// and local running of containers and commands
|
||||
EnableDebuggingHandlers bool
|
||||
// enableContentionProfiling enables block profiling, if enableDebuggingHandlers is true.
|
||||
EnableContentionProfiling bool
|
||||
// healthzPort is the port of the localhost healthz endpoint (set to 0 to disable)
|
||||
HealthzPort int32
|
||||
// healthzBindAddress is the IP address for the healthz server to serve on
|
||||
HealthzBindAddress string
|
||||
// oomScoreAdj is The oom-score-adj value for kubelet process. Values
|
||||
// must be within the range [-1000, 1000].
|
||||
OOMScoreAdj int32
|
||||
// clusterDomain is the DNS domain for this cluster. If set, kubelet will
|
||||
// configure all containers to search this domain in addition to the
|
||||
// host's search domains.
|
||||
ClusterDomain string
|
||||
// clusterDNS is a list of IP addresses for a cluster DNS server. If set,
|
||||
// kubelet will configure all containers to use this for DNS resolution
|
||||
// instead of the host's DNS servers.
|
||||
ClusterDNS []string
|
||||
// streamingConnectionIdleTimeout is the maximum time a streaming connection
|
||||
// can be idle before the connection is automatically closed.
|
||||
StreamingConnectionIdleTimeout metav1.Duration
|
||||
// nodeStatusUpdateFrequency is the frequency that kubelet computes node
|
||||
// status. If node lease feature is not enabled, it is also the frequency that
|
||||
// kubelet posts node status to master. In that case, be cautious when
|
||||
// changing the constant, it must work with nodeMonitorGracePeriod in nodecontroller.
|
||||
NodeStatusUpdateFrequency metav1.Duration
|
||||
// nodeStatusReportFrequency is the frequency that kubelet posts node
|
||||
// status to master if node status does not change. Kubelet will ignore this
|
||||
// frequency and post node status immediately if any change is detected. It is
|
||||
// only used when node lease feature is enabled.
|
||||
NodeStatusReportFrequency metav1.Duration
|
||||
// nodeLeaseDurationSeconds is the duration the Kubelet will set on its corresponding Lease.
|
||||
NodeLeaseDurationSeconds int32
|
||||
// ImageMinimumGCAge is the minimum age for an unused image before it is
|
||||
// garbage collected.
|
||||
ImageMinimumGCAge metav1.Duration
|
||||
// ImageMaximumGCAge is the maximum age an image can be unused before it is garbage collected.
|
||||
// The default of this field is "0s", which disables this field--meaning images won't be garbage
|
||||
// collected based on being unused for too long.
|
||||
ImageMaximumGCAge metav1.Duration
|
||||
// imageGCHighThresholdPercent is the percent of disk usage after which
|
||||
// image garbage collection is always run. The percent is calculated as
|
||||
// this field value out of 100.
|
||||
ImageGCHighThresholdPercent int32
|
||||
// imageGCLowThresholdPercent is the percent of disk usage before which
|
||||
// image garbage collection is never run. Lowest disk usage to garbage
|
||||
// collect to. The percent is calculated as this field value out of 100.
|
||||
ImageGCLowThresholdPercent int32
|
||||
// How frequently to calculate and cache volume disk usage for all pods
|
||||
VolumeStatsAggPeriod metav1.Duration
|
||||
// KubeletCgroups is the absolute name of cgroups to isolate the kubelet in
|
||||
KubeletCgroups string
|
||||
// SystemCgroups is absolute name of cgroups in which to place
|
||||
// all non-kernel processes that are not already in a container. Empty
|
||||
// for no container. Rolling back the flag requires a reboot.
|
||||
SystemCgroups string
|
||||
// CgroupRoot is the root cgroup to use for pods.
|
||||
// If CgroupsPerQOS is enabled, this is the root of the QoS cgroup hierarchy.
|
||||
CgroupRoot string
|
||||
// Enable QoS based Cgroup hierarchy: top level cgroups for QoS Classes
|
||||
// And all Burstable and BestEffort pods are brought up under their
|
||||
// specific top level QoS cgroup.
|
||||
CgroupsPerQOS bool
|
||||
// driver that the kubelet uses to manipulate cgroups on the host (cgroupfs or systemd)
|
||||
CgroupDriver string
|
||||
// SingleProcessOOMKill, if true, will prevent the `memory.oom.group` flag from being set for container
|
||||
// cgroups in cgroups v2. This causes processes in the container to be OOM killed individually instead of as
|
||||
// a group. It means that if true, the behavior aligns with the behavior of cgroups v1.
|
||||
SingleProcessOOMKill *bool
|
||||
// CPUManagerPolicy is the name of the policy to use.
|
||||
// Requires the CPUManager feature gate to be enabled.
|
||||
CPUManagerPolicy string
|
||||
// CPUManagerPolicyOptions is a set of key=value which allows to set extra options
|
||||
// to fine tune the behaviour of the cpu manager policies.
|
||||
// Requires both the "CPUManager" and "CPUManagerPolicyOptions" feature gates to be enabled.
|
||||
CPUManagerPolicyOptions map[string]string
|
||||
// CPU Manager reconciliation period.
|
||||
// Requires the CPUManager feature gate to be enabled.
|
||||
CPUManagerReconcilePeriod metav1.Duration
|
||||
// MemoryManagerPolicy is the name of the policy to use.
|
||||
// Requires the MemoryManager feature gate to be enabled.
|
||||
MemoryManagerPolicy string
|
||||
// TopologyManagerPolicy is the name of the policy to use.
|
||||
TopologyManagerPolicy string
|
||||
// TopologyManagerScope represents the scope of topology hint generation
|
||||
// that topology manager requests and hint providers generate.
|
||||
// Default: "container"
|
||||
// +optional
|
||||
TopologyManagerScope string
|
||||
// TopologyManagerPolicyOptions is a set of key=value which allows to set extra options
|
||||
// to fine tune the behaviour of the topology manager policies.
|
||||
// Requires both the "TopologyManager" and "TopologyManagerPolicyOptions" feature gates to be enabled.
|
||||
TopologyManagerPolicyOptions map[string]string
|
||||
// Map of QoS resource reservation percentages (memory only for now).
|
||||
// Requires the QOSReserved feature gate to be enabled.
|
||||
QOSReserved map[string]string
|
||||
// runtimeRequestTimeout is the timeout for all runtime requests except long running
|
||||
// requests - pull, logs, exec and attach.
|
||||
RuntimeRequestTimeout metav1.Duration
|
||||
// hairpinMode specifies how the Kubelet should configure the container
|
||||
// bridge for hairpin packets.
|
||||
// Setting this flag allows endpoints in a Service to loadbalance back to
|
||||
// themselves if they should try to access their own Service. Values:
|
||||
// "promiscuous-bridge": make the container bridge promiscuous.
|
||||
// "hairpin-veth": set the hairpin flag on container veth interfaces.
|
||||
// "none": do nothing.
|
||||
// Generally, one must set --hairpin-mode=hairpin-veth to achieve hairpin NAT,
|
||||
// because promiscuous-bridge assumes the existence of a container bridge named cbr0.
|
||||
HairpinMode string
|
||||
// maxPods is the number of pods that can run on this Kubelet.
|
||||
MaxPods int32
|
||||
// The CIDR to use for pod IP addresses, only used in standalone mode.
|
||||
// In cluster mode, this is obtained from the master.
|
||||
PodCIDR string
|
||||
// The maximum number of processes per pod. If -1, the kubelet defaults to the node allocatable pid capacity.
|
||||
PodPidsLimit int64
|
||||
// ResolverConfig is the resolver configuration file used as the basis
|
||||
// for the container DNS resolution configuration.
|
||||
ResolverConfig string
|
||||
// RunOnce causes the Kubelet to check the API server once for pods,
|
||||
// run those in addition to the pods specified by static pod files, and exit.
|
||||
// Deprecated: no longer has any effect.
|
||||
RunOnce bool
|
||||
// cpuCFSQuota enables CPU CFS quota enforcement for containers that
|
||||
// specify CPU limits
|
||||
CPUCFSQuota bool
|
||||
// CPUCFSQuotaPeriod sets the CPU CFS quota period value, cpu.cfs_period_us, defaults to 100ms
|
||||
CPUCFSQuotaPeriod metav1.Duration
|
||||
// maxOpenFiles is Number of files that can be opened by Kubelet process.
|
||||
MaxOpenFiles int64
|
||||
// nodeStatusMaxImages caps the number of images reported in Node.Status.Images.
|
||||
NodeStatusMaxImages int32
|
||||
// contentType is contentType of requests sent to apiserver.
|
||||
ContentType string
|
||||
// kubeAPIQPS is the QPS to use while talking with kubernetes apiserver
|
||||
KubeAPIQPS int32
|
||||
// kubeAPIBurst is the burst to allow while talking with kubernetes
|
||||
// apiserver
|
||||
KubeAPIBurst int32
|
||||
// serializeImagePulls when enabled, tells the Kubelet to pull images one at a time.
|
||||
SerializeImagePulls bool
|
||||
// MaxParallelImagePulls sets the maximum number of image pulls in parallel.
|
||||
MaxParallelImagePulls *int32
|
||||
// Map of signal names to quantities that defines hard eviction thresholds. For example: {"memory.available": "300Mi"}.
|
||||
// Some default signals are Linux only: nodefs.inodesFree
|
||||
EvictionHard map[string]string
|
||||
// Map of signal names to quantities that defines soft eviction thresholds. For example: {"memory.available": "300Mi"}.
|
||||
EvictionSoft map[string]string
|
||||
// Map of signal names to quantities that defines grace periods for each soft eviction signal. For example: {"memory.available": "30s"}.
|
||||
EvictionSoftGracePeriod map[string]string
|
||||
// Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.
|
||||
EvictionPressureTransitionPeriod metav1.Duration
|
||||
// Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met.
|
||||
EvictionMaxPodGracePeriod int32
|
||||
// Map of signal names to quantities that defines minimum reclaims, which describe the minimum
|
||||
// amount of a given resource the kubelet will reclaim when performing a pod eviction while
|
||||
// that resource is under pressure. For example: {"imagefs.available": "2Gi"}
|
||||
EvictionMinimumReclaim map[string]string
|
||||
// podsPerCore is the maximum number of pods per core. Cannot exceed MaxPods.
|
||||
// If 0, this field is ignored.
|
||||
PodsPerCore int32
|
||||
// enableControllerAttachDetach enables the Attach/Detach controller to
|
||||
// manage attachment/detachment of volumes scheduled to this node, and
|
||||
// disables kubelet from executing any attach/detach operations
|
||||
EnableControllerAttachDetach bool
|
||||
// protectKernelDefaults, if true, causes the Kubelet to error if kernel
|
||||
// flags are not as it expects. Otherwise the Kubelet will attempt to modify
|
||||
// kernel flags to match its expectation.
|
||||
ProtectKernelDefaults bool
|
||||
// If true, Kubelet creates the KUBE-IPTABLES-HINT chain in iptables as a hint to
|
||||
// other components about the configuration of iptables on the system.
|
||||
MakeIPTablesUtilChains bool
|
||||
// iptablesMasqueradeBit formerly controlled the creation of the KUBE-MARK-MASQ
|
||||
// chain.
|
||||
// Deprecated: no longer has any effect.
|
||||
IPTablesMasqueradeBit int32
|
||||
// iptablesDropBit formerly controlled the creation of the KUBE-MARK-DROP chain.
|
||||
// Deprecated: no longer has any effect.
|
||||
IPTablesDropBit int32
|
||||
// featureGates is a map of feature names to bools that enable or disable alpha/experimental
|
||||
// features. This field modifies piecemeal the built-in default values from
|
||||
// "k8s.io/kubernetes/pkg/features/kube_features.go".
|
||||
FeatureGates map[string]bool
|
||||
// Tells the Kubelet to fail to start if swap is enabled on the node.
|
||||
FailSwapOn bool
|
||||
// memorySwap configures swap memory available to container workloads.
|
||||
// +featureGate=NodeSwap
|
||||
// +optional
|
||||
MemorySwap MemorySwapConfiguration
|
||||
// A quantity defines the maximum size of the container log file before it is rotated. For example: "5Mi" or "256Ki".
|
||||
ContainerLogMaxSize string
|
||||
// Maximum number of container log files that can be present for a container.
|
||||
ContainerLogMaxFiles int32
|
||||
// Maximum number of concurrent log rotation workers to spawn for processing the log rotation
|
||||
// requests
|
||||
ContainerLogMaxWorkers int32
|
||||
// Interval at which the container logs are monitored for rotation
|
||||
ContainerLogMonitorInterval metav1.Duration
|
||||
// ConfigMapAndSecretChangeDetectionStrategy is a mode in which config map and secret managers are running.
|
||||
ConfigMapAndSecretChangeDetectionStrategy ResourceChangeDetectionStrategy
|
||||
// A comma separated allowlist of unsafe sysctls or sysctl patterns (ending in `*`).
|
||||
// Unsafe sysctl groups are `kernel.shm*`, `kernel.msg*`, `kernel.sem`, `fs.mqueue.*`, and `net.*`.
|
||||
// These sysctls are namespaced but not allowed by default.
|
||||
// For example: "`kernel.msg*,net.ipv4.route.min_pmtu`"
|
||||
// +optional
|
||||
AllowedUnsafeSysctls []string
|
||||
// kernelMemcgNotification if enabled, the kubelet will integrate with the kernel memcg
|
||||
// notification to determine if memory eviction thresholds are crossed rather than polling.
|
||||
KernelMemcgNotification bool
|
||||
|
||||
/* the following fields are meant for Node Allocatable */
|
||||
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G,ephemeral-storage=1G,pid=100) pairs
|
||||
// that describe resources reserved for non-kubernetes components.
|
||||
// Currently only cpu, memory and local ephemeral storage for root file system are supported.
|
||||
// See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources for more detail.
|
||||
SystemReserved map[string]string
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G,ephemeral-storage=1G,pid=100) pairs
|
||||
// that describe resources reserved for kubernetes system components.
|
||||
// Currently only cpu, memory and local ephemeral storage for root file system are supported.
|
||||
// See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources for more detail.
|
||||
KubeReserved map[string]string
|
||||
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
|
||||
// Refer to [Node Allocatable](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable) doc for more information.
|
||||
SystemReservedCgroup string
|
||||
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
|
||||
// Refer to [Node Allocatable](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable) doc for more information.
|
||||
KubeReservedCgroup string
|
||||
// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform.
|
||||
// This flag accepts a list of options. Acceptable options are `pods`, `system-reserved` & `kube-reserved`.
|
||||
// Refer to [Node Allocatable](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable) doc for more information.
|
||||
EnforceNodeAllocatable []string
|
||||
// This option specifies the cpu list reserved for the host level system threads and kubernetes related threads.
|
||||
// This provide a "static" CPU list rather than the "dynamic" list by system-reserved and kube-reserved.
|
||||
// This option overwrites CPUs provided by system-reserved and kube-reserved.
|
||||
ReservedSystemCPUs string
|
||||
// The previous version for which you want to show hidden metrics.
|
||||
// Only the previous minor version is meaningful, other values will not be allowed.
|
||||
// The format is <major>.<minor>, e.g.: '1.16'.
|
||||
// The purpose of this format is make sure you have the opportunity to notice if the next release hides additional metrics,
|
||||
// rather than being surprised when they are permanently removed in the release after that.
|
||||
ShowHiddenMetricsForVersion string
|
||||
// Logging specifies the options of logging.
|
||||
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information.
|
||||
Logging logsapi.LoggingConfiguration
|
||||
// EnableSystemLogHandler enables /logs handler.
|
||||
EnableSystemLogHandler bool
|
||||
// EnableSystemLogQuery enables the node log query feature on the /logs endpoint.
|
||||
// EnableSystemLogHandler has to be enabled in addition for this feature to work.
|
||||
// Enabling this feature has security implications. The recommendation is to enable it on a need basis for debugging
|
||||
// purposes and disabling otherwise.
|
||||
// +featureGate=NodeLogQuery
|
||||
// +optional
|
||||
EnableSystemLogQuery bool
|
||||
// ShutdownGracePeriod specifies the total duration that the node should delay the shutdown and total grace period for pod termination during a node shutdown.
|
||||
// Defaults to 0 seconds.
|
||||
// +featureGate=GracefulNodeShutdown
|
||||
// +optional
|
||||
ShutdownGracePeriod metav1.Duration
|
||||
// ShutdownGracePeriodCriticalPods specifies the duration used to terminate critical pods during a node shutdown. This should be less than ShutdownGracePeriod.
|
||||
// Defaults to 0 seconds.
|
||||
// For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods.
|
||||
// +featureGate=GracefulNodeShutdown
|
||||
// +optional
|
||||
ShutdownGracePeriodCriticalPods metav1.Duration
|
||||
// ShutdownGracePeriodByPodPriority specifies the shutdown grace period for Pods based
|
||||
// on their associated priority class value.
|
||||
// When a shutdown request is received, the Kubelet will initiate shutdown on all pods
|
||||
// running on the node with a grace period that depends on the priority of the pod,
|
||||
// and then wait for all pods to exit.
|
||||
// Each entry in the array represents the graceful shutdown time a pod with a priority
|
||||
// class value that lies in the range of that value and the next higher entry in the
|
||||
// list when the node is shutting down.
|
||||
ShutdownGracePeriodByPodPriority []ShutdownGracePeriodByPodPriority
|
||||
// ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes.
|
||||
// The parameter makes sense only in the context of the memory manager feature. The memory manager will not allocate reserved memory for container workloads.
|
||||
// For example, if you have a NUMA0 with 10Gi of memory and the ReservedMemory was specified to reserve 1Gi of memory at NUMA0,
|
||||
// the memory manager will assume that only 9Gi is available for allocation.
|
||||
// You can specify a different amount of NUMA node and memory types.
|
||||
// You can omit this parameter at all, but you should be aware that the amount of reserved memory from all NUMA nodes
|
||||
// should be equal to the amount of memory specified by the node allocatable features(https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable).
|
||||
// If at least one node allocatable parameter has a non-zero value, you will need to specify at least one NUMA node.
|
||||
// Also, avoid specifying:
|
||||
// 1. Duplicates, the same NUMA node, and memory type, but with a different value.
|
||||
// 2. zero limits for any memory type.
|
||||
// 3. NUMAs nodes IDs that do not exist under the machine.
|
||||
// 4. memory types except for memory and hugepages-<size>
|
||||
ReservedMemory []MemoryReservation
|
||||
// EnableProfiling enables /debug/pprof handler.
|
||||
EnableProfilingHandler bool
|
||||
// EnableDebugFlagsHandler enables/debug/flags/v handler.
|
||||
EnableDebugFlagsHandler bool
|
||||
// SeccompDefault enables the use of `RuntimeDefault` as the default seccomp profile for all workloads.
|
||||
SeccompDefault bool
|
||||
// MemoryThrottlingFactor specifies the factor multiplied by the memory limit or node allocatable memory
|
||||
// when setting the cgroupv2 memory.high value to enforce MemoryQoS.
|
||||
// Decreasing this factor will set lower high limit for container cgroups and put heavier reclaim pressure
|
||||
// while increasing will put less reclaim pressure.
|
||||
// See https://kep.k8s.io/2570 for more details.
|
||||
// Default: 0.9
|
||||
// +featureGate=MemoryQoS
|
||||
// +optional
|
||||
MemoryThrottlingFactor *float64
|
||||
// registerWithTaints are an array of taints to add to a node object when
|
||||
// the kubelet registers itself. This only takes effect when registerNode
|
||||
// is true and upon the initial registration of the node.
|
||||
// +optional
|
||||
RegisterWithTaints []v1.Taint
|
||||
// registerNode enables automatic registration with the apiserver.
|
||||
// +optional
|
||||
RegisterNode bool
|
||||
|
||||
// Tracing specifies the versioned configuration for OpenTelemetry tracing clients.
|
||||
// See https://kep.k8s.io/2832 for more details.
|
||||
// +featureGate=KubeletTracing
|
||||
// +optional
|
||||
Tracing *tracingapi.TracingConfiguration
|
||||
|
||||
// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
|
||||
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
|
||||
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
|
||||
// usage from the volume exceeds the limit.
|
||||
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
|
||||
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
|
||||
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
|
||||
// +optional
|
||||
LocalStorageCapacityIsolation bool
|
||||
|
||||
// ContainerRuntimeEndpoint is the endpoint of container runtime.
|
||||
// unix domain sockets supported on Linux while npipes and tcp endpoints are supported for windows.
|
||||
// Examples:'unix:///path/to/runtime.sock', 'npipe:////./pipe/runtime'
|
||||
ContainerRuntimeEndpoint string
|
||||
|
||||
// ImageServiceEndpoint is the endpoint of container image service.
|
||||
// If not specified the default value is ContainerRuntimeEndpoint
|
||||
// +optional
|
||||
ImageServiceEndpoint string
|
||||
|
||||
// FailCgroupV1 prevents the kubelet from starting on hosts
|
||||
// that use cgroup v1. By default, this is set to 'false', meaning
|
||||
// the kubelet is allowed to start on cgroup v1 hosts unless this
|
||||
// option is explicitly enabled.
|
||||
// +optional
|
||||
FailCgroupV1 bool
|
||||
|
||||
// CrashLoopBackOff contains config to modify node-level parameters for
|
||||
// container restart behavior
|
||||
// +featureGate=KubeletCrashLoopBackoffMax
|
||||
// +optional
|
||||
CrashLoopBackOff CrashLoopBackOffConfig
|
||||
}
|
||||
|
||||
// KubeletAuthorizationMode denotes the authorization mode for the kubelet
|
||||
type KubeletAuthorizationMode string
|
||||
|
||||
const (
|
||||
// KubeletAuthorizationModeAlwaysAllow authorizes all authenticated requests
|
||||
KubeletAuthorizationModeAlwaysAllow KubeletAuthorizationMode = "AlwaysAllow"
|
||||
// KubeletAuthorizationModeWebhook uses the SubjectAccessReview API to determine authorization
|
||||
KubeletAuthorizationModeWebhook KubeletAuthorizationMode = "Webhook"
|
||||
)
|
||||
|
||||
// KubeletAuthorization holds the state related to the authorization in the kublet.
|
||||
type KubeletAuthorization struct {
|
||||
// mode is the authorization mode to apply to requests to the kubelet server.
|
||||
// Valid values are AlwaysAllow and Webhook.
|
||||
// Webhook mode uses the SubjectAccessReview API to determine authorization.
|
||||
Mode KubeletAuthorizationMode
|
||||
|
||||
// webhook contains settings related to Webhook authorization.
|
||||
Webhook KubeletWebhookAuthorization
|
||||
}
|
||||
|
||||
// KubeletWebhookAuthorization holds the state related to the Webhook
|
||||
// Authorization in the Kubelet.
|
||||
type KubeletWebhookAuthorization struct {
|
||||
// cacheAuthorizedTTL is the duration to cache 'authorized' responses from the webhook authorizer.
|
||||
CacheAuthorizedTTL metav1.Duration
|
||||
// cacheUnauthorizedTTL is the duration to cache 'unauthorized' responses from the webhook authorizer.
|
||||
CacheUnauthorizedTTL metav1.Duration
|
||||
}
|
||||
|
||||
// KubeletAuthentication holds the Kubetlet Authentication setttings.
|
||||
type KubeletAuthentication struct {
|
||||
// x509 contains settings related to x509 client certificate authentication
|
||||
X509 KubeletX509Authentication
|
||||
// webhook contains settings related to webhook bearer token authentication
|
||||
Webhook KubeletWebhookAuthentication
|
||||
// anonymous contains settings related to anonymous authentication
|
||||
Anonymous KubeletAnonymousAuthentication
|
||||
}
|
||||
|
||||
// KubeletX509Authentication contains settings related to x509 client certificate authentication
|
||||
type KubeletX509Authentication struct {
|
||||
// clientCAFile is the path to a PEM-encoded certificate bundle. If set, any request presenting a client certificate
|
||||
// signed by one of the authorities in the bundle is authenticated with a username corresponding to the CommonName,
|
||||
// and groups corresponding to the Organization in the client certificate.
|
||||
ClientCAFile string
|
||||
}
|
||||
|
||||
// KubeletWebhookAuthentication contains settings related to webhook authentication
|
||||
type KubeletWebhookAuthentication struct {
|
||||
// enabled allows bearer token authentication backed by the tokenreviews.authentication.k8s.io API
|
||||
Enabled bool
|
||||
// cacheTTL enables caching of authentication results
|
||||
CacheTTL metav1.Duration
|
||||
}
|
||||
|
||||
// KubeletAnonymousAuthentication enables anonymous requests to the kubelet server.
|
||||
type KubeletAnonymousAuthentication struct {
|
||||
// enabled allows anonymous requests to the kubelet server.
|
||||
// Requests that are not rejected by another authentication method are treated as anonymous requests.
|
||||
// Anonymous requests have a username of system:anonymous, and a group name of system:unauthenticated.
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// SerializedNodeConfigSource allows us to serialize NodeConfigSource
|
||||
// This type is used internally by the Kubelet for tracking checkpointed dynamic configs.
|
||||
// It exists in the kubeletconfig API group because it is classified as a versioned input to the Kubelet.
|
||||
type SerializedNodeConfigSource struct {
|
||||
metav1.TypeMeta
|
||||
// Source is the source that we are serializing
|
||||
// +optional
|
||||
Source v1.NodeConfigSource
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// CredentialProviderConfig is the configuration containing information about
|
||||
// each exec credential provider. Kubelet reads this configuration from disk and enables
|
||||
// each provider as specified by the CredentialProvider type.
|
||||
type CredentialProviderConfig struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// providers is a list of credential provider plugins that will be enabled by the kubelet.
|
||||
// Multiple providers may match against a single image, in which case credentials
|
||||
// from all providers will be returned to the kubelet. If multiple providers are called
|
||||
// for a single image, the results are combined. If providers return overlapping
|
||||
// auth keys, the value from the provider earlier in this list is used.
|
||||
Providers []CredentialProvider
|
||||
}
|
||||
|
||||
// CredentialProvider represents an exec plugin to be invoked by the kubelet. The plugin is only
|
||||
// invoked when an image being pulled matches the images handled by the plugin (see matchImages).
|
||||
type CredentialProvider struct {
|
||||
// name is the required name of the credential provider. It must match the name of the
|
||||
// provider executable as seen by the kubelet. The executable must be in the kubelet's
|
||||
// bin directory (set by the --credential-provider-bin-dir flag).
|
||||
Name string
|
||||
|
||||
// matchImages is a required list of strings used to match against images in order to
|
||||
// determine if this provider should be invoked. If one of the strings matches the
|
||||
// requested image from the kubelet, the plugin will be invoked and given a chance
|
||||
// to provide credentials. Images are expected to contain the registry domain
|
||||
// and URL path.
|
||||
//
|
||||
// Each entry in matchImages is a pattern which can optionally contain a port and a path.
|
||||
// Globs can be used in the domain, but not in the port or the path. Globs are supported
|
||||
// as subdomains like `*.k8s.io` or `k8s.*.io`, and top-level-domains such as `k8s.*`.
|
||||
// Matching partial subdomains like `app*.k8s.io` is also supported. Each glob can only match
|
||||
// a single subdomain segment, so `*.io` does not match *.k8s.io.
|
||||
//
|
||||
// A match exists between an image and a matchImage when all of the below are true:
|
||||
// - Both contain the same number of domain parts and each part matches.
|
||||
// - The URL path of an imageMatch must be a prefix of the target image URL path.
|
||||
// - If the imageMatch contains a port, then the port must match in the image as well.
|
||||
//
|
||||
// Example values of matchImages:
|
||||
// - `123456789.dkr.ecr.us-east-1.amazonaws.com`
|
||||
// - `*.azurecr.io`
|
||||
// - `gcr.io`
|
||||
// - `*.*.registry.io`
|
||||
// - `registry.io:8080/path`
|
||||
MatchImages []string
|
||||
|
||||
// defaultCacheDuration is the default duration the plugin will cache credentials in-memory
|
||||
// if a cache duration is not provided in the plugin response. This field is required.
|
||||
DefaultCacheDuration *metav1.Duration
|
||||
|
||||
// Required input version of the exec CredentialProviderRequest. The returned CredentialProviderResponse
|
||||
// MUST use the same encoding version as the input. Current supported values are:
|
||||
// - credentialprovider.kubelet.k8s.io/v1alpha1
|
||||
// - credentialprovider.kubelet.k8s.io/v1beta1
|
||||
// - credentialprovider.kubelet.k8s.io/v1
|
||||
APIVersion string
|
||||
|
||||
// Arguments to pass to the command when executing it.
|
||||
// +optional
|
||||
Args []string
|
||||
|
||||
// Env defines additional environment variables to expose to the process. These
|
||||
// are unioned with the host's environment, as well as variables client-go uses
|
||||
// to pass argument to the plugin.
|
||||
// +optional
|
||||
Env []ExecEnvVar
|
||||
}
|
||||
|
||||
// ExecEnvVar is used for setting environment variables when executing an exec-based
|
||||
// credential plugin.
|
||||
type ExecEnvVar struct {
|
||||
Name string
|
||||
Value string
|
||||
}
|
||||
|
||||
// MemoryReservation specifies the memory reservation of different types for each NUMA node
|
||||
type MemoryReservation struct {
|
||||
NumaNode int32
|
||||
Limits v1.ResourceList
|
||||
}
|
||||
|
||||
// ShutdownGracePeriodByPodPriority specifies the shutdown grace period for Pods based on their associated priority class value
|
||||
type ShutdownGracePeriodByPodPriority struct {
|
||||
// priority is the priority value associated with the shutdown grace period
|
||||
Priority int32
|
||||
// shutdownGracePeriodSeconds is the shutdown grace period in seconds
|
||||
ShutdownGracePeriodSeconds int64
|
||||
}
|
||||
|
||||
type MemorySwapConfiguration struct {
|
||||
// swapBehavior configures swap memory available to container workloads. May be one of
|
||||
// "", "NoSwap": workloads can not use swap, default option.
|
||||
// "LimitedSwap": workload swap usage is limited. The swap limit is proportionate to the container's memory request.
|
||||
// +featureGate=NodeSwap
|
||||
// +optional
|
||||
SwapBehavior string
|
||||
}
|
||||
|
||||
// CrashLoopBackOffConfig is used for setting configuration for this kubelet's
|
||||
// container restart behavior
|
||||
type CrashLoopBackOffConfig struct {
|
||||
// MaxContainerRestartPeriod is the maximum duration the backoff delay can accrue
|
||||
// to for container restarts, minimum 1 second, maximum 300 seconds.
|
||||
// +featureGate=KubeletCrashLoopBackOffMax
|
||||
// +optional
|
||||
MaxContainerRestartPeriod *metav1.Duration
|
||||
}
|
508
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/zz_generated.deepcopy.go
generated
vendored
Normal file
508
vendor/k8s.io/kubernetes/pkg/kubelet/apis/config/zz_generated.deepcopy.go
generated
vendored
Normal file
@ -0,0 +1,508 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by deepcopy-gen. DO NOT EDIT.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
apiv1 "k8s.io/component-base/tracing/api/v1"
|
||||
)
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *CrashLoopBackOffConfig) DeepCopyInto(out *CrashLoopBackOffConfig) {
|
||||
*out = *in
|
||||
if in.MaxContainerRestartPeriod != nil {
|
||||
in, out := &in.MaxContainerRestartPeriod, &out.MaxContainerRestartPeriod
|
||||
*out = new(v1.Duration)
|
||||
**out = **in
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CrashLoopBackOffConfig.
|
||||
func (in *CrashLoopBackOffConfig) DeepCopy() *CrashLoopBackOffConfig {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(CrashLoopBackOffConfig)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *CredentialProvider) DeepCopyInto(out *CredentialProvider) {
|
||||
*out = *in
|
||||
if in.MatchImages != nil {
|
||||
in, out := &in.MatchImages, &out.MatchImages
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.DefaultCacheDuration != nil {
|
||||
in, out := &in.DefaultCacheDuration, &out.DefaultCacheDuration
|
||||
*out = new(v1.Duration)
|
||||
**out = **in
|
||||
}
|
||||
if in.Args != nil {
|
||||
in, out := &in.Args, &out.Args
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.Env != nil {
|
||||
in, out := &in.Env, &out.Env
|
||||
*out = make([]ExecEnvVar, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CredentialProvider.
|
||||
func (in *CredentialProvider) DeepCopy() *CredentialProvider {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(CredentialProvider)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *CredentialProviderConfig) DeepCopyInto(out *CredentialProviderConfig) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
if in.Providers != nil {
|
||||
in, out := &in.Providers, &out.Providers
|
||||
*out = make([]CredentialProvider, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CredentialProviderConfig.
|
||||
func (in *CredentialProviderConfig) DeepCopy() *CredentialProviderConfig {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(CredentialProviderConfig)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *CredentialProviderConfig) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ExecEnvVar) DeepCopyInto(out *ExecEnvVar) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExecEnvVar.
|
||||
func (in *ExecEnvVar) DeepCopy() *ExecEnvVar {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ExecEnvVar)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeletAnonymousAuthentication) DeepCopyInto(out *KubeletAnonymousAuthentication) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletAnonymousAuthentication.
|
||||
func (in *KubeletAnonymousAuthentication) DeepCopy() *KubeletAnonymousAuthentication {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeletAnonymousAuthentication)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeletAuthentication) DeepCopyInto(out *KubeletAuthentication) {
|
||||
*out = *in
|
||||
out.X509 = in.X509
|
||||
out.Webhook = in.Webhook
|
||||
out.Anonymous = in.Anonymous
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletAuthentication.
|
||||
func (in *KubeletAuthentication) DeepCopy() *KubeletAuthentication {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeletAuthentication)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeletAuthorization) DeepCopyInto(out *KubeletAuthorization) {
|
||||
*out = *in
|
||||
out.Webhook = in.Webhook
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletAuthorization.
|
||||
func (in *KubeletAuthorization) DeepCopy() *KubeletAuthorization {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeletAuthorization)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
out.SyncFrequency = in.SyncFrequency
|
||||
out.FileCheckFrequency = in.FileCheckFrequency
|
||||
out.HTTPCheckFrequency = in.HTTPCheckFrequency
|
||||
if in.StaticPodURLHeader != nil {
|
||||
in, out := &in.StaticPodURLHeader, &out.StaticPodURLHeader
|
||||
*out = make(map[string][]string, len(*in))
|
||||
for key, val := range *in {
|
||||
var outVal []string
|
||||
if val == nil {
|
||||
(*out)[key] = nil
|
||||
} else {
|
||||
in, out := &val, &outVal
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
(*out)[key] = outVal
|
||||
}
|
||||
}
|
||||
if in.TLSCipherSuites != nil {
|
||||
in, out := &in.TLSCipherSuites, &out.TLSCipherSuites
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
out.Authentication = in.Authentication
|
||||
out.Authorization = in.Authorization
|
||||
if in.ClusterDNS != nil {
|
||||
in, out := &in.ClusterDNS, &out.ClusterDNS
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
out.StreamingConnectionIdleTimeout = in.StreamingConnectionIdleTimeout
|
||||
out.NodeStatusUpdateFrequency = in.NodeStatusUpdateFrequency
|
||||
out.NodeStatusReportFrequency = in.NodeStatusReportFrequency
|
||||
out.ImageMinimumGCAge = in.ImageMinimumGCAge
|
||||
out.ImageMaximumGCAge = in.ImageMaximumGCAge
|
||||
out.VolumeStatsAggPeriod = in.VolumeStatsAggPeriod
|
||||
if in.SingleProcessOOMKill != nil {
|
||||
in, out := &in.SingleProcessOOMKill, &out.SingleProcessOOMKill
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.CPUManagerPolicyOptions != nil {
|
||||
in, out := &in.CPUManagerPolicyOptions, &out.CPUManagerPolicyOptions
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
|
||||
if in.TopologyManagerPolicyOptions != nil {
|
||||
in, out := &in.TopologyManagerPolicyOptions, &out.TopologyManagerPolicyOptions
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.QOSReserved != nil {
|
||||
in, out := &in.QOSReserved, &out.QOSReserved
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
||||
out.CPUCFSQuotaPeriod = in.CPUCFSQuotaPeriod
|
||||
if in.MaxParallelImagePulls != nil {
|
||||
in, out := &in.MaxParallelImagePulls, &out.MaxParallelImagePulls
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
}
|
||||
if in.EvictionHard != nil {
|
||||
in, out := &in.EvictionHard, &out.EvictionHard
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.EvictionSoft != nil {
|
||||
in, out := &in.EvictionSoft, &out.EvictionSoft
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.EvictionSoftGracePeriod != nil {
|
||||
in, out := &in.EvictionSoftGracePeriod, &out.EvictionSoftGracePeriod
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
if in.EvictionMinimumReclaim != nil {
|
||||
in, out := &in.EvictionMinimumReclaim, &out.EvictionMinimumReclaim
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.FeatureGates != nil {
|
||||
in, out := &in.FeatureGates, &out.FeatureGates
|
||||
*out = make(map[string]bool, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
out.MemorySwap = in.MemorySwap
|
||||
out.ContainerLogMonitorInterval = in.ContainerLogMonitorInterval
|
||||
if in.AllowedUnsafeSysctls != nil {
|
||||
in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.SystemReserved != nil {
|
||||
in, out := &in.SystemReserved, &out.SystemReserved
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.KubeReserved != nil {
|
||||
in, out := &in.KubeReserved, &out.KubeReserved
|
||||
*out = make(map[string]string, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.EnforceNodeAllocatable != nil {
|
||||
in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
in.Logging.DeepCopyInto(&out.Logging)
|
||||
out.ShutdownGracePeriod = in.ShutdownGracePeriod
|
||||
out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
|
||||
if in.ShutdownGracePeriodByPodPriority != nil {
|
||||
in, out := &in.ShutdownGracePeriodByPodPriority, &out.ShutdownGracePeriodByPodPriority
|
||||
*out = make([]ShutdownGracePeriodByPodPriority, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.ReservedMemory != nil {
|
||||
in, out := &in.ReservedMemory, &out.ReservedMemory
|
||||
*out = make([]MemoryReservation, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
if in.MemoryThrottlingFactor != nil {
|
||||
in, out := &in.MemoryThrottlingFactor, &out.MemoryThrottlingFactor
|
||||
*out = new(float64)
|
||||
**out = **in
|
||||
}
|
||||
if in.RegisterWithTaints != nil {
|
||||
in, out := &in.RegisterWithTaints, &out.RegisterWithTaints
|
||||
*out = make([]corev1.Taint, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
if in.Tracing != nil {
|
||||
in, out := &in.Tracing, &out.Tracing
|
||||
*out = new(apiv1.TracingConfiguration)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
in.CrashLoopBackOff.DeepCopyInto(&out.CrashLoopBackOff)
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletConfiguration.
|
||||
func (in *KubeletConfiguration) DeepCopy() *KubeletConfiguration {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeletConfiguration)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *KubeletConfiguration) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeletWebhookAuthentication) DeepCopyInto(out *KubeletWebhookAuthentication) {
|
||||
*out = *in
|
||||
out.CacheTTL = in.CacheTTL
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletWebhookAuthentication.
|
||||
func (in *KubeletWebhookAuthentication) DeepCopy() *KubeletWebhookAuthentication {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeletWebhookAuthentication)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeletWebhookAuthorization) DeepCopyInto(out *KubeletWebhookAuthorization) {
|
||||
*out = *in
|
||||
out.CacheAuthorizedTTL = in.CacheAuthorizedTTL
|
||||
out.CacheUnauthorizedTTL = in.CacheUnauthorizedTTL
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletWebhookAuthorization.
|
||||
func (in *KubeletWebhookAuthorization) DeepCopy() *KubeletWebhookAuthorization {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeletWebhookAuthorization)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeletX509Authentication) DeepCopyInto(out *KubeletX509Authentication) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletX509Authentication.
|
||||
func (in *KubeletX509Authentication) DeepCopy() *KubeletX509Authentication {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeletX509Authentication)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *MemoryReservation) DeepCopyInto(out *MemoryReservation) {
|
||||
*out = *in
|
||||
if in.Limits != nil {
|
||||
in, out := &in.Limits, &out.Limits
|
||||
*out = make(corev1.ResourceList, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val.DeepCopy()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MemoryReservation.
|
||||
func (in *MemoryReservation) DeepCopy() *MemoryReservation {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(MemoryReservation)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *MemorySwapConfiguration) DeepCopyInto(out *MemorySwapConfiguration) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MemorySwapConfiguration.
|
||||
func (in *MemorySwapConfiguration) DeepCopy() *MemorySwapConfiguration {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(MemorySwapConfiguration)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *SerializedNodeConfigSource) DeepCopyInto(out *SerializedNodeConfigSource) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
in.Source.DeepCopyInto(&out.Source)
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SerializedNodeConfigSource.
|
||||
func (in *SerializedNodeConfigSource) DeepCopy() *SerializedNodeConfigSource {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(SerializedNodeConfigSource)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *SerializedNodeConfigSource) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ShutdownGracePeriodByPodPriority) DeepCopyInto(out *ShutdownGracePeriodByPodPriority) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ShutdownGracePeriodByPodPriority.
|
||||
func (in *ShutdownGracePeriodByPodPriority) DeepCopy() *ShutdownGracePeriodByPodPriority {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ShutdownGracePeriodByPodPriority)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
16
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/.mockery.yaml
generated
vendored
Normal file
16
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/.mockery.yaml
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
dir: testing
|
||||
filename: "{{.InterfaceName | snakecase}}.go"
|
||||
boilerplate-file: ../../../../hack/boilerplate/boilerplate.generatego.txt
|
||||
outpkg: testing
|
||||
with-expecter: true
|
||||
packages:
|
||||
k8s.io/kubernetes/pkg/kubelet/apis/podresources:
|
||||
interfaces:
|
||||
CPUsProvider:
|
||||
config:
|
||||
filename: cpus_provider.go
|
||||
DevicesProvider:
|
||||
DynamicResourcesProvider:
|
||||
MemoryProvider:
|
||||
PodsProvider:
|
72
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/client.go
generated
vendored
Normal file
72
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/client.go
generated
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podresources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
|
||||
"k8s.io/cri-client/pkg/util"
|
||||
"k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
|
||||
)
|
||||
|
||||
// Note: Consumers of the pod resources API should not be importing this package.
|
||||
// They should copy paste the function in their project.
|
||||
|
||||
// GetV1alpha1Client returns a client for the PodResourcesLister grpc service
|
||||
// Note: This is deprecated
|
||||
func GetV1alpha1Client(socket string, connectionTimeout time.Duration, maxMsgSize int) (v1alpha1.PodResourcesListerClient, *grpc.ClientConn, error) {
|
||||
addr, dialer, err := util.GetAddressAndDialer(socket)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout)
|
||||
defer cancel()
|
||||
|
||||
conn, err := grpc.DialContext(ctx, addr,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithContextDialer(dialer),
|
||||
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)))
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error dialing socket %s: %v", socket, err)
|
||||
}
|
||||
return v1alpha1.NewPodResourcesListerClient(conn), conn, nil
|
||||
}
|
||||
|
||||
// GetV1Client returns a client for the PodResourcesLister grpc service
|
||||
func GetV1Client(socket string, connectionTimeout time.Duration, maxMsgSize int) (v1.PodResourcesListerClient, *grpc.ClientConn, error) {
|
||||
addr, dialer, err := util.GetAddressAndDialer(socket)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout)
|
||||
defer cancel()
|
||||
|
||||
conn, err := grpc.DialContext(ctx, addr,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithContextDialer(dialer),
|
||||
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)))
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error dialing socket %s: %v", socket, err)
|
||||
}
|
||||
return v1.NewPodResourcesListerClient(conn), conn, nil
|
||||
}
|
32
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/constants.go
generated
vendored
Normal file
32
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/constants.go
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podresources
|
||||
|
||||
const (
|
||||
// Socket is the name of the podresources server socket
|
||||
Socket = "kubelet"
|
||||
|
||||
// DefaultQPS is determined by empirically reviewing known consumers of the API.
|
||||
// It's at least unlikely that there is a legitimate need to query podresources
|
||||
// more than 100 times per second, the other subsystems are not guaranteed to react
|
||||
// so fast in the first place.
|
||||
DefaultQPS = 100
|
||||
|
||||
// DefaultBurstTokens is determined by empirically reviewing known consumers of the API.
|
||||
// See the documentation of DefaultQPS, same caveats apply.
|
||||
DefaultBurstTokens = 10
|
||||
)
|
163
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1.go
generated
vendored
Normal file
163
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1.go
generated
vendored
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podresources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
|
||||
podresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
)
|
||||
|
||||
// v1PodResourcesServer implements PodResourcesListerServer
|
||||
type v1PodResourcesServer struct {
|
||||
podsProvider PodsProvider
|
||||
devicesProvider DevicesProvider
|
||||
cpusProvider CPUsProvider
|
||||
memoryProvider MemoryProvider
|
||||
dynamicResourcesProvider DynamicResourcesProvider
|
||||
}
|
||||
|
||||
// NewV1PodResourcesServer returns a PodResourcesListerServer which lists pods provided by the PodsProvider
|
||||
// with device information provided by the DevicesProvider
|
||||
func NewV1PodResourcesServer(providers PodResourcesProviders) podresourcesv1.PodResourcesListerServer {
|
||||
return &v1PodResourcesServer{
|
||||
podsProvider: providers.Pods,
|
||||
devicesProvider: providers.Devices,
|
||||
cpusProvider: providers.Cpus,
|
||||
memoryProvider: providers.Memory,
|
||||
dynamicResourcesProvider: providers.DynamicResources,
|
||||
}
|
||||
}
|
||||
|
||||
// List returns information about the resources assigned to pods on the node
|
||||
func (p *v1PodResourcesServer) List(ctx context.Context, req *podresourcesv1.ListPodResourcesRequest) (*podresourcesv1.ListPodResourcesResponse, error) {
|
||||
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
|
||||
metrics.PodResourcesEndpointRequestsListCount.WithLabelValues("v1").Inc()
|
||||
|
||||
pods := p.podsProvider.GetPods()
|
||||
podResources := make([]*podresourcesv1.PodResources, len(pods))
|
||||
p.devicesProvider.UpdateAllocatedDevices()
|
||||
|
||||
for i, pod := range pods {
|
||||
pRes := podresourcesv1.PodResources{
|
||||
Name: pod.Name,
|
||||
Namespace: pod.Namespace,
|
||||
Containers: make([]*podresourcesv1.ContainerResources, 0, len(pod.Spec.Containers)),
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SidecarContainers) {
|
||||
pRes.Containers = make([]*podresourcesv1.ContainerResources, 0, len(pod.Spec.InitContainers)+len(pod.Spec.Containers))
|
||||
|
||||
for _, container := range pod.Spec.InitContainers {
|
||||
if !podutil.IsRestartableInitContainer(&container) {
|
||||
continue
|
||||
}
|
||||
|
||||
pRes.Containers = append(pRes.Containers, p.getContainerResources(pod, &container))
|
||||
}
|
||||
}
|
||||
|
||||
for _, container := range pod.Spec.Containers {
|
||||
pRes.Containers = append(pRes.Containers, p.getContainerResources(pod, &container))
|
||||
}
|
||||
podResources[i] = &pRes
|
||||
}
|
||||
|
||||
response := &podresourcesv1.ListPodResourcesResponse{
|
||||
PodResources: podResources,
|
||||
}
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// GetAllocatableResources returns information about all the resources known by the server - this more like the capacity, not like the current amount of free resources.
|
||||
func (p *v1PodResourcesServer) GetAllocatableResources(ctx context.Context, req *podresourcesv1.AllocatableResourcesRequest) (*podresourcesv1.AllocatableResourcesResponse, error) {
|
||||
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
|
||||
metrics.PodResourcesEndpointRequestsGetAllocatableCount.WithLabelValues("v1").Inc()
|
||||
|
||||
response := &podresourcesv1.AllocatableResourcesResponse{
|
||||
Devices: p.devicesProvider.GetAllocatableDevices(),
|
||||
CpuIds: p.cpusProvider.GetAllocatableCPUs(),
|
||||
Memory: p.memoryProvider.GetAllocatableMemory(),
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// Get returns information about the resources assigned to a specific pod
|
||||
func (p *v1PodResourcesServer) Get(ctx context.Context, req *podresourcesv1.GetPodResourcesRequest) (*podresourcesv1.GetPodResourcesResponse, error) {
|
||||
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
|
||||
metrics.PodResourcesEndpointRequestsGetCount.WithLabelValues("v1").Inc()
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletPodResourcesGet) {
|
||||
metrics.PodResourcesEndpointErrorsGetCount.WithLabelValues("v1").Inc()
|
||||
return nil, fmt.Errorf("PodResources API Get method disabled")
|
||||
}
|
||||
|
||||
pod, exist := p.podsProvider.GetPodByName(req.PodNamespace, req.PodName)
|
||||
if !exist {
|
||||
metrics.PodResourcesEndpointErrorsGetCount.WithLabelValues("v1").Inc()
|
||||
return nil, fmt.Errorf("pod %s in namespace %s not found", req.PodName, req.PodNamespace)
|
||||
}
|
||||
|
||||
podResources := &podresourcesv1.PodResources{
|
||||
Name: pod.Name,
|
||||
Namespace: pod.Namespace,
|
||||
Containers: make([]*podresourcesv1.ContainerResources, 0, len(pod.Spec.Containers)),
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SidecarContainers) {
|
||||
podResources.Containers = make([]*podresourcesv1.ContainerResources, 0, len(pod.Spec.InitContainers)+len(pod.Spec.Containers))
|
||||
|
||||
for _, container := range pod.Spec.InitContainers {
|
||||
if !podutil.IsRestartableInitContainer(&container) {
|
||||
continue
|
||||
}
|
||||
|
||||
podResources.Containers = append(podResources.Containers, p.getContainerResources(pod, &container))
|
||||
}
|
||||
}
|
||||
|
||||
for _, container := range pod.Spec.Containers {
|
||||
podResources.Containers = append(podResources.Containers, p.getContainerResources(pod, &container))
|
||||
}
|
||||
|
||||
response := &podresourcesv1.GetPodResourcesResponse{
|
||||
PodResources: podResources,
|
||||
}
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func (p *v1PodResourcesServer) getContainerResources(pod *v1.Pod, container *v1.Container) *podresourcesv1.ContainerResources {
|
||||
containerResources := &podresourcesv1.ContainerResources{
|
||||
Name: container.Name,
|
||||
Devices: p.devicesProvider.GetDevices(string(pod.UID), container.Name),
|
||||
CpuIds: p.cpusProvider.GetCPUs(string(pod.UID), container.Name),
|
||||
Memory: p.memoryProvider.GetMemory(string(pod.UID), container.Name),
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletPodResourcesDynamicResources) {
|
||||
containerResources.DynamicResources = p.dynamicResourcesProvider.GetDynamicResources(pod, container)
|
||||
}
|
||||
|
||||
return containerResources
|
||||
}
|
82
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1alpha1.go
generated
vendored
Normal file
82
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1alpha1.go
generated
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podresources
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
|
||||
"k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
|
||||
)
|
||||
|
||||
// v1alpha1PodResourcesServer implements PodResourcesListerServer
|
||||
type v1alpha1PodResourcesServer struct {
|
||||
podsProvider PodsProvider
|
||||
devicesProvider DevicesProvider
|
||||
}
|
||||
|
||||
// NewV1alpha1PodResourcesServer returns a PodResourcesListerServer which lists pods provided by the PodsProvider
|
||||
// with device information provided by the DevicesProvider
|
||||
func NewV1alpha1PodResourcesServer(providers PodResourcesProviders) v1alpha1.PodResourcesListerServer {
|
||||
return &v1alpha1PodResourcesServer{
|
||||
podsProvider: providers.Pods,
|
||||
devicesProvider: providers.Devices,
|
||||
}
|
||||
}
|
||||
|
||||
func v1DevicesToAlphaV1(alphaDevs []*v1.ContainerDevices) []*v1alpha1.ContainerDevices {
|
||||
var devs []*v1alpha1.ContainerDevices
|
||||
for _, alphaDev := range alphaDevs {
|
||||
dev := v1alpha1.ContainerDevices{
|
||||
ResourceName: alphaDev.ResourceName,
|
||||
DeviceIds: alphaDev.DeviceIds,
|
||||
}
|
||||
devs = append(devs, &dev)
|
||||
}
|
||||
|
||||
return devs
|
||||
}
|
||||
|
||||
// List returns information about the resources assigned to pods on the node
|
||||
func (p *v1alpha1PodResourcesServer) List(ctx context.Context, req *v1alpha1.ListPodResourcesRequest) (*v1alpha1.ListPodResourcesResponse, error) {
|
||||
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1alpha1").Inc()
|
||||
pods := p.podsProvider.GetPods()
|
||||
podResources := make([]*v1alpha1.PodResources, len(pods))
|
||||
p.devicesProvider.UpdateAllocatedDevices()
|
||||
|
||||
for i, pod := range pods {
|
||||
pRes := v1alpha1.PodResources{
|
||||
Name: pod.Name,
|
||||
Namespace: pod.Namespace,
|
||||
Containers: make([]*v1alpha1.ContainerResources, len(pod.Spec.Containers)),
|
||||
}
|
||||
|
||||
for j, container := range pod.Spec.Containers {
|
||||
pRes.Containers[j] = &v1alpha1.ContainerResources{
|
||||
Name: container.Name,
|
||||
Devices: v1DevicesToAlphaV1(p.devicesProvider.GetDevices(string(pod.UID), container.Name)),
|
||||
}
|
||||
}
|
||||
podResources[i] = &pRes
|
||||
}
|
||||
|
||||
return &v1alpha1.ListPodResourcesResponse{
|
||||
PodResources: podResources,
|
||||
}, nil
|
||||
}
|
67
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/types.go
generated
vendored
Normal file
67
vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/types.go
generated
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
//go:generate mockery
|
||||
package podresources
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
)
|
||||
|
||||
// DevicesProvider knows how to provide the devices used by the given container
|
||||
type DevicesProvider interface {
|
||||
// UpdateAllocatedDevices frees any Devices that are bound to terminated pods.
|
||||
UpdateAllocatedDevices()
|
||||
// GetDevices returns information about the devices assigned to pods and containers
|
||||
GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices
|
||||
// GetAllocatableDevices returns information about all the devices known to the manager
|
||||
GetAllocatableDevices() []*podresourcesapi.ContainerDevices
|
||||
}
|
||||
|
||||
// PodsProvider knows how to provide the pods admitted by the node
|
||||
type PodsProvider interface {
|
||||
GetPods() []*v1.Pod
|
||||
GetPodByName(namespace, name string) (*v1.Pod, bool)
|
||||
}
|
||||
|
||||
// CPUsProvider knows how to provide the cpus used by the given container
|
||||
type CPUsProvider interface {
|
||||
// GetCPUs returns information about the cpus assigned to pods and containers
|
||||
GetCPUs(podUID, containerName string) []int64
|
||||
// GetAllocatableCPUs returns the allocatable (not allocated) CPUs
|
||||
GetAllocatableCPUs() []int64
|
||||
}
|
||||
|
||||
type MemoryProvider interface {
|
||||
// GetMemory returns information about the memory assigned to containers
|
||||
GetMemory(podUID, containerName string) []*podresourcesapi.ContainerMemory
|
||||
// GetAllocatableMemory returns the allocatable memory from the node
|
||||
GetAllocatableMemory() []*podresourcesapi.ContainerMemory
|
||||
}
|
||||
|
||||
type DynamicResourcesProvider interface {
|
||||
// GetDynamicResources returns information about dynamic resources assigned to pods and containers
|
||||
GetDynamicResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.DynamicResource
|
||||
}
|
||||
|
||||
type PodResourcesProviders struct {
|
||||
Pods PodsProvider
|
||||
Devices DevicesProvider
|
||||
Cpus CPUsProvider
|
||||
Memory MemoryProvider
|
||||
DynamicResources DynamicResourcesProvider
|
||||
}
|
10
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/.mockery.yaml
generated
vendored
Normal file
10
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/.mockery.yaml
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
dir: testing
|
||||
filename: cadvisor_mock.go
|
||||
boilerplate-file: ../../../hack/boilerplate/boilerplate.generatego.txt
|
||||
outpkg: testing
|
||||
with-expecter: true
|
||||
packages:
|
||||
k8s.io/kubernetes/pkg/kubelet/cadvisor:
|
||||
interfaces:
|
||||
Interface:
|
179
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_linux.go
generated
vendored
Normal file
179
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_linux.go
generated
vendored
Normal file
@ -0,0 +1,179 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cadvisor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
// Register supported container handlers.
|
||||
_ "github.com/google/cadvisor/container/containerd/install"
|
||||
_ "github.com/google/cadvisor/container/crio/install"
|
||||
_ "github.com/google/cadvisor/container/systemd/install"
|
||||
|
||||
"github.com/google/cadvisor/cache/memory"
|
||||
cadvisormetrics "github.com/google/cadvisor/container"
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
"github.com/google/cadvisor/manager"
|
||||
"github.com/google/cadvisor/utils/sysfs"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
type cadvisorClient struct {
|
||||
imageFsInfoProvider ImageFsInfoProvider
|
||||
rootPath string
|
||||
manager.Manager
|
||||
}
|
||||
|
||||
var _ Interface = new(cadvisorClient)
|
||||
|
||||
// TODO(vmarmol): Make configurable.
|
||||
// The amount of time for which to keep stats in memory.
|
||||
const statsCacheDuration = 2 * time.Minute
|
||||
const maxHousekeepingInterval = 15 * time.Second
|
||||
const defaultHousekeepingInterval = 10 * time.Second
|
||||
const allowDynamicHousekeeping = true
|
||||
|
||||
func init() {
|
||||
// Override cAdvisor flag defaults.
|
||||
flagOverrides := map[string]string{
|
||||
// Override the default cAdvisor housekeeping interval.
|
||||
"housekeeping_interval": defaultHousekeepingInterval.String(),
|
||||
// Disable event storage by default.
|
||||
"event_storage_event_limit": "default=0",
|
||||
"event_storage_age_limit": "default=0",
|
||||
}
|
||||
for name, defaultValue := range flagOverrides {
|
||||
if f := flag.Lookup(name); f != nil {
|
||||
f.DefValue = defaultValue
|
||||
f.Value.Set(defaultValue)
|
||||
} else {
|
||||
ctx := context.Background()
|
||||
klog.FromContext(ctx).Error(nil, "Expected cAdvisor flag not found", "flag", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a new cAdvisor Interface for linux systems.
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
|
||||
sysFs := sysfs.NewRealSysFs()
|
||||
|
||||
includedMetrics := cadvisormetrics.MetricSet{
|
||||
cadvisormetrics.CpuUsageMetrics: struct{}{},
|
||||
cadvisormetrics.MemoryUsageMetrics: struct{}{},
|
||||
cadvisormetrics.CpuLoadMetrics: struct{}{},
|
||||
cadvisormetrics.DiskIOMetrics: struct{}{},
|
||||
cadvisormetrics.NetworkUsageMetrics: struct{}{},
|
||||
cadvisormetrics.AppMetrics: struct{}{},
|
||||
cadvisormetrics.ProcessMetrics: struct{}{},
|
||||
cadvisormetrics.OOMMetrics: struct{}{},
|
||||
}
|
||||
|
||||
if usingLegacyStats || localStorageCapacityIsolation {
|
||||
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
|
||||
}
|
||||
|
||||
duration := maxHousekeepingInterval
|
||||
housekeepingConfig := manager.HousekeepingConfig{
|
||||
Interval: &duration,
|
||||
AllowDynamic: ptr.To(allowDynamicHousekeeping),
|
||||
}
|
||||
|
||||
// Create the cAdvisor container manager.
|
||||
m, err := manager.New(memory.New(statsCacheDuration, nil), sysFs, housekeepingConfig, includedMetrics, http.DefaultClient, cgroupRoots, nil /* containerEnvMetadataWhiteList */, "" /* perfEventsFile */, time.Duration(0) /*resctrlInterval*/)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(rootPath); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(path.Clean(rootPath), 0750); err != nil {
|
||||
return nil, fmt.Errorf("error creating root directory %q: %v", rootPath, err)
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("failed to Stat %q: %v", rootPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
return &cadvisorClient{
|
||||
imageFsInfoProvider: imageFsInfoProvider,
|
||||
rootPath: rootPath,
|
||||
Manager: m,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) Start() error {
|
||||
return cc.Manager.Start()
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) ContainerInfoV2(name string, options cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error) {
|
||||
return cc.GetContainerInfoV2(name, options)
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) VersionInfo() (*cadvisorapi.VersionInfo, error) {
|
||||
return cc.GetVersionInfo()
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) MachineInfo() (*cadvisorapi.MachineInfo, error) {
|
||||
return cc.GetMachineInfo()
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) ImagesFsInfo(ctx context.Context) (cadvisorapiv2.FsInfo, error) {
|
||||
label, err := cc.imageFsInfoProvider.ImageFsInfoLabel()
|
||||
if err != nil {
|
||||
return cadvisorapiv2.FsInfo{}, err
|
||||
}
|
||||
return cc.getFsInfo(ctx, label)
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
|
||||
return cc.GetDirFsInfo(cc.rootPath)
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) getFsInfo(ctx context.Context, label string) (cadvisorapiv2.FsInfo, error) {
|
||||
res, err := cc.GetFsInfo(label)
|
||||
if err != nil {
|
||||
return cadvisorapiv2.FsInfo{}, err
|
||||
}
|
||||
if len(res) == 0 {
|
||||
return cadvisorapiv2.FsInfo{}, fmt.Errorf("failed to find information for the filesystem labeled %q", label)
|
||||
}
|
||||
// TODO(vmarmol): Handle this better when a label has more than one image filesystem.
|
||||
if len(res) > 1 {
|
||||
klog.FromContext(ctx).Info("More than one filesystem labeled. Only using the first one", "label", label, "fileSystem", res)
|
||||
}
|
||||
|
||||
return res[0], nil
|
||||
}
|
||||
|
||||
func (cc *cadvisorClient) ContainerFsInfo(ctx context.Context) (cadvisorapiv2.FsInfo, error) {
|
||||
label, err := cc.imageFsInfoProvider.ContainerFsInfoLabel()
|
||||
if err != nil {
|
||||
return cadvisorapiv2.FsInfo{}, err
|
||||
}
|
||||
return cc.getFsInfo(ctx, label)
|
||||
}
|
76
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_unsupported.go
generated
vendored
Normal file
76
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
//go:build !linux && !windows
|
||||
// +build !linux,!windows
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cadvisor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
)
|
||||
|
||||
type cadvisorUnsupported struct {
|
||||
}
|
||||
|
||||
var _ Interface = new(cadvisorUnsupported)
|
||||
|
||||
// New creates a new cAdvisor Interface for unsupported systems.
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
|
||||
return &cadvisorUnsupported{}, nil
|
||||
}
|
||||
|
||||
var errUnsupported = errors.New("cAdvisor is unsupported in this build")
|
||||
|
||||
func (cu *cadvisorUnsupported) Start() error {
|
||||
return errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) ContainerInfoV2(name string, options cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error) {
|
||||
return nil, errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) GetRequestedContainersInfo(containerName string, options cadvisorapiv2.RequestOptions) (map[string]*cadvisorapi.ContainerInfo, error) {
|
||||
return nil, errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) MachineInfo() (*cadvisorapi.MachineInfo, error) {
|
||||
return nil, errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) VersionInfo() (*cadvisorapi.VersionInfo, error) {
|
||||
return nil, errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) ImagesFsInfo(context.Context) (cadvisorapiv2.FsInfo, error) {
|
||||
return cadvisorapiv2.FsInfo{}, errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
|
||||
return cadvisorapiv2.FsInfo{}, errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) ContainerFsInfo(context.Context) (cadvisorapiv2.FsInfo, error) {
|
||||
return cadvisorapiv2.FsInfo{}, errUnsupported
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) GetDirFsInfo(path string) (cadvisorapiv2.FsInfo, error) {
|
||||
return cadvisorapiv2.FsInfo{}, nil
|
||||
}
|
81
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_windows.go
generated
vendored
Normal file
81
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_windows.go
generated
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cadvisor
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/winstats"
|
||||
)
|
||||
|
||||
type cadvisorClient struct {
|
||||
rootPath string
|
||||
winStatsClient winstats.Client
|
||||
}
|
||||
|
||||
var _ Interface = new(cadvisorClient)
|
||||
|
||||
// New creates a cAdvisor and exports its API on the specified port if port > 0.
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
|
||||
client, err := winstats.NewPerfCounterClient()
|
||||
return &cadvisorClient{
|
||||
rootPath: rootPath,
|
||||
winStatsClient: client,
|
||||
}, err
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) Start() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ContainerInfoV2 is only expected to be used for the root container. Returns info for all containers in the node.
|
||||
func (cu *cadvisorClient) ContainerInfoV2(name string, options cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error) {
|
||||
return cu.winStatsClient.WinContainerInfos()
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) GetRequestedContainersInfo(containerName string, options cadvisorapiv2.RequestOptions) (map[string]*cadvisorapi.ContainerInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) MachineInfo() (*cadvisorapi.MachineInfo, error) {
|
||||
return cu.winStatsClient.WinMachineInfo()
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) VersionInfo() (*cadvisorapi.VersionInfo, error) {
|
||||
return cu.winStatsClient.WinVersionInfo()
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) ImagesFsInfo(context.Context) (cadvisorapiv2.FsInfo, error) {
|
||||
return cadvisorapiv2.FsInfo{}, nil
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) ContainerFsInfo(context.Context) (cadvisorapiv2.FsInfo, error) {
|
||||
return cadvisorapiv2.FsInfo{}, nil
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
|
||||
return cu.GetDirFsInfo(cu.rootPath)
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) GetDirFsInfo(path string) (cadvisorapiv2.FsInfo, error) {
|
||||
return cu.winStatsClient.GetDirFsInfo(path)
|
||||
}
|
18
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/doc.go
generated
vendored
Normal file
18
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/doc.go
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package cadvisor provides an interface for Kubelet interactions with cAdvisor.
|
||||
package cadvisor // import "k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
63
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/helpers_linux.go
generated
vendored
Normal file
63
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/helpers_linux.go
generated
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cadvisor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
cadvisorfs "github.com/google/cadvisor/fs"
|
||||
)
|
||||
|
||||
// imageFsInfoProvider knows how to translate the configured runtime
|
||||
// to its file system label for images.
|
||||
type imageFsInfoProvider struct {
|
||||
runtimeEndpoint string
|
||||
}
|
||||
|
||||
// ImageFsInfoLabel returns the image fs label for the configured runtime.
|
||||
// For remote runtimes, it handles additional runtimes natively understood by cAdvisor.
|
||||
func (i *imageFsInfoProvider) ImageFsInfoLabel() (string, error) {
|
||||
if detectCrioWorkaround(i) {
|
||||
return cadvisorfs.LabelCrioImages, nil
|
||||
}
|
||||
return "", fmt.Errorf("no imagefs label for configured runtime")
|
||||
}
|
||||
|
||||
// ContainerFsInfoLabel returns the container fs label for the configured runtime.
|
||||
// For remote runtimes, it handles addition runtimes natively understood by cAdvisor.
|
||||
func (i *imageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
|
||||
if detectCrioWorkaround(i) {
|
||||
return cadvisorfs.LabelCrioContainers, nil
|
||||
}
|
||||
return "", fmt.Errorf("no containerfs label for configured runtime")
|
||||
}
|
||||
|
||||
// This is a temporary workaround to get stats for cri-o from cadvisor
|
||||
// and should be removed.
|
||||
// Related to https://github.com/kubernetes/kubernetes/issues/51798
|
||||
func detectCrioWorkaround(i *imageFsInfoProvider) bool {
|
||||
return strings.HasSuffix(i.runtimeEndpoint, CrioSocketSuffix)
|
||||
}
|
||||
|
||||
// NewImageFsInfoProvider returns a provider for the specified runtime configuration.
|
||||
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
|
||||
return &imageFsInfoProvider{runtimeEndpoint: runtimeEndpoint}
|
||||
}
|
39
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/helpers_unsupported.go
generated
vendored
Normal file
39
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/helpers_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cadvisor
|
||||
|
||||
import "errors"
|
||||
|
||||
type unsupportedImageFsInfoProvider struct{}
|
||||
|
||||
// ImageFsInfoLabel returns the image fs label for the configured runtime.
|
||||
// For remote runtimes, it handles additional runtimes natively understood by cAdvisor.
|
||||
func (i *unsupportedImageFsInfoProvider) ImageFsInfoLabel() (string, error) {
|
||||
return "", errors.New("unsupported")
|
||||
}
|
||||
|
||||
func (i *unsupportedImageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
|
||||
return "", errors.New("unsupported")
|
||||
}
|
||||
|
||||
// NewImageFsInfoProvider returns a provider for the specified runtime configuration.
|
||||
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
|
||||
return &unsupportedImageFsInfoProvider{}
|
||||
}
|
56
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/types.go
generated
vendored
Normal file
56
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/types.go
generated
vendored
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
//go:generate mockery
|
||||
package cadvisor
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
)
|
||||
|
||||
// Interface is an abstract interface for testability. It abstracts the interface to cAdvisor.
|
||||
type Interface interface {
|
||||
Start() error
|
||||
ContainerInfoV2(name string, options cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error)
|
||||
GetRequestedContainersInfo(containerName string, options cadvisorapiv2.RequestOptions) (map[string]*cadvisorapi.ContainerInfo, error)
|
||||
MachineInfo() (*cadvisorapi.MachineInfo, error)
|
||||
|
||||
VersionInfo() (*cadvisorapi.VersionInfo, error)
|
||||
|
||||
// Returns usage information about the filesystem holding container images.
|
||||
ImagesFsInfo(context.Context) (cadvisorapiv2.FsInfo, error)
|
||||
|
||||
// Returns usage information about the root filesystem.
|
||||
RootFsInfo() (cadvisorapiv2.FsInfo, error)
|
||||
|
||||
// Returns usage information about the writeable layer.
|
||||
// KEP 4191 can separate the image filesystem
|
||||
ContainerFsInfo(context.Context) (cadvisorapiv2.FsInfo, error)
|
||||
|
||||
// Get filesystem information for the filesystem that contains the given file.
|
||||
GetDirFsInfo(path string) (cadvisorapiv2.FsInfo, error)
|
||||
}
|
||||
|
||||
// ImageFsInfoProvider informs cAdvisor how to find imagefs for container images.
|
||||
type ImageFsInfoProvider interface {
|
||||
// ImageFsInfoLabel returns the label cAdvisor should use to find the filesystem holding container images.
|
||||
ImageFsInfoLabel() (string, error)
|
||||
// In split image filesystem this will be different from ImageFsInfoLabel
|
||||
ContainerFsInfoLabel() (string, error)
|
||||
}
|
85
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/util.go
generated
vendored
Normal file
85
vendor/k8s.io/kubernetes/pkg/kubelet/cadvisor/util.go
generated
vendored
Normal file
@ -0,0 +1,85 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cadvisor
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
cadvisorapi2 "github.com/google/cadvisor/info/v2"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
)
|
||||
|
||||
const (
|
||||
// CrioSocketSuffix is the path to the CRI-O socket.
|
||||
// Please keep this in sync with the one in:
|
||||
// github.com/google/cadvisor/tree/master/container/crio/client.go
|
||||
// Note that however we only match on the suffix, as /var/run is often a
|
||||
// symlink to /run, so the user can specify either path.
|
||||
CrioSocketSuffix = "run/crio/crio.sock"
|
||||
)
|
||||
|
||||
// CapacityFromMachineInfo returns the capacity of the resources from the machine info.
|
||||
func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(
|
||||
int64(info.NumCores*1000),
|
||||
resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(
|
||||
int64(info.MemoryCapacity),
|
||||
resource.BinarySI),
|
||||
}
|
||||
|
||||
// if huge pages are enabled, we report them as a schedulable resource on the node
|
||||
for _, hugepagesInfo := range info.HugePages {
|
||||
pageSizeBytes := int64(hugepagesInfo.PageSize * 1024)
|
||||
hugePagesBytes := pageSizeBytes * int64(hugepagesInfo.NumPages)
|
||||
pageSizeQuantity := resource.NewQuantity(pageSizeBytes, resource.BinarySI)
|
||||
c[v1helper.HugePageResourceName(*pageSizeQuantity)] = *resource.NewQuantity(hugePagesBytes, resource.BinarySI)
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// EphemeralStorageCapacityFromFsInfo returns the capacity of the ephemeral storage from the FsInfo.
|
||||
func EphemeralStorageCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(
|
||||
int64(info.Capacity),
|
||||
resource.BinarySI),
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// UsingLegacyCadvisorStats returns true if container stats are provided by cadvisor instead of through the CRI.
|
||||
// CRI integrations should get container metrics via CRI.
|
||||
// TODO: cri-o relies on cadvisor as a temporary workaround. The code should
|
||||
// be removed. Related issue:
|
||||
// https://github.com/kubernetes/kubernetes/issues/51798
|
||||
func UsingLegacyCadvisorStats(runtimeEndpoint string) bool {
|
||||
// If PodAndContainerStatsFromCRI feature is enabled, then assume the user
|
||||
// wants to use CRI stats, as the aforementioned workaround isn't needed
|
||||
// when this feature is enabled.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodAndContainerStatsFromCRI) {
|
||||
return false
|
||||
}
|
||||
return strings.HasSuffix(runtimeEndpoint, CrioSocketSuffix)
|
||||
}
|
25
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/README.md
generated
vendored
Normal file
25
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/README.md
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
## DISCLAIMER
|
||||
- Sig-Node community has reached a general consensus, as a best practice, to
|
||||
avoid introducing any new checkpointing support. We reached this understanding
|
||||
after struggling with some hard-to-debug issues in the production environments
|
||||
caused by the checkpointing.
|
||||
- Any changes to the checkpointed data structure would be considered incompatible and a component should add its own handling if it needs to ensure backward compatibility of reading old-format checkpoint files.
|
||||
|
||||
## Introduction
|
||||
This folder contains a framework & primitives, Checkpointing Manager, which is
|
||||
used by several other Kubelet submodules, `dockershim`, `devicemanager`, `pods`
|
||||
and `cpumanager`, to implement checkpointing at each submodule level. As already
|
||||
explained in above `Disclaimer` section, think twice before introducing any further
|
||||
checkpointing in Kubelet. If still checkpointing is required, then this folder
|
||||
provides the common APIs and the framework for implementing checkpointing.
|
||||
Using same APIs across all the submodules will help maintaining consistency at
|
||||
Kubelet level.
|
||||
|
||||
Below is the history of checkpointing support in Kubelet.
|
||||
|
||||
| Package | First checkpointing support merged on | PR link |
|
||||
| ------- | --------------------------------------| ------- |
|
||||
|kubelet/dockershim | Feb 3, 2017 | [[CRI] Implement Dockershim Checkpoint](https://github.com/kubernetes/kubernetes/pull/39903)
|
||||
|devicemanager| Sep 6, 2017 | [Deviceplugin checkpoint](https://github.com/kubernetes/kubernetes/pull/51744)
|
||||
| kubelet/pod | Nov 22, 2017 | [Initial basic bootstrap-checkpoint support](https://github.com/kubernetes/kubernetes/pull/50984)
|
||||
|cpumanager| Oct 27, 2017 |[Add file backed state to cpu manager ](https://github.com/kubernetes/kubernetes/pull/54408)
|
110
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checkpoint_manager.go
generated
vendored
Normal file
110
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checkpoint_manager.go
generated
vendored
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checkpointmanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
)
|
||||
|
||||
// Checkpoint provides the process checkpoint data
|
||||
type Checkpoint interface {
|
||||
MarshalCheckpoint() ([]byte, error)
|
||||
UnmarshalCheckpoint(blob []byte) error
|
||||
VerifyChecksum() error
|
||||
}
|
||||
|
||||
// CheckpointManager provides the interface to manage checkpoint
|
||||
type CheckpointManager interface {
|
||||
// CreateCheckpoint persists checkpoint in CheckpointStore. checkpointKey is the key for utilstore to locate checkpoint.
|
||||
// For file backed utilstore, checkpointKey is the file name to write the checkpoint data.
|
||||
CreateCheckpoint(checkpointKey string, checkpoint Checkpoint) error
|
||||
// GetCheckpoint retrieves checkpoint from CheckpointStore.
|
||||
GetCheckpoint(checkpointKey string, checkpoint Checkpoint) error
|
||||
// WARNING: RemoveCheckpoint will not return error if checkpoint does not exist.
|
||||
RemoveCheckpoint(checkpointKey string) error
|
||||
// ListCheckpoint returns the list of existing checkpoints.
|
||||
ListCheckpoints() ([]string, error)
|
||||
}
|
||||
|
||||
// impl is an implementation of CheckpointManager. It persists checkpoint in CheckpointStore
|
||||
type impl struct {
|
||||
path string
|
||||
store utilstore.Store
|
||||
mutex sync.Mutex
|
||||
}
|
||||
|
||||
// NewCheckpointManager returns a new instance of a checkpoint manager
|
||||
func NewCheckpointManager(checkpointDir string) (CheckpointManager, error) {
|
||||
fstore, err := utilstore.NewFileStore(checkpointDir, &utilfs.DefaultFs{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &impl{path: checkpointDir, store: fstore}, nil
|
||||
}
|
||||
|
||||
// CreateCheckpoint persists checkpoint in CheckpointStore.
|
||||
func (manager *impl) CreateCheckpoint(checkpointKey string, checkpoint Checkpoint) error {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
blob, err := checkpoint.MarshalCheckpoint()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return manager.store.Write(checkpointKey, blob)
|
||||
}
|
||||
|
||||
// GetCheckpoint retrieves checkpoint from CheckpointStore.
|
||||
func (manager *impl) GetCheckpoint(checkpointKey string, checkpoint Checkpoint) error {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
blob, err := manager.store.Read(checkpointKey)
|
||||
if err != nil {
|
||||
if err == utilstore.ErrKeyNotFound {
|
||||
return errors.ErrCheckpointNotFound
|
||||
}
|
||||
return err
|
||||
}
|
||||
err = checkpoint.UnmarshalCheckpoint(blob)
|
||||
if err == nil {
|
||||
err = checkpoint.VerifyChecksum()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// RemoveCheckpoint will not return error if checkpoint does not exist.
|
||||
func (manager *impl) RemoveCheckpoint(checkpointKey string) error {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
return manager.store.Delete(checkpointKey)
|
||||
}
|
||||
|
||||
// ListCheckpoints returns the list of existing checkpoints.
|
||||
func (manager *impl) ListCheckpoints() ([]string, error) {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
keys, err := manager.store.List()
|
||||
if err != nil {
|
||||
return []string{}, fmt.Errorf("failed to list checkpoint store: %v", err)
|
||||
}
|
||||
return keys, nil
|
||||
}
|
48
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum/checksum.go
generated
vendored
Normal file
48
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum/checksum.go
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checksum
|
||||
|
||||
import (
|
||||
"hash/fnv"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
||||
)
|
||||
|
||||
// Checksum is the data to be stored as checkpoint
|
||||
type Checksum uint64
|
||||
|
||||
// Verify verifies that passed checksum is same as calculated checksum
|
||||
func (cs Checksum) Verify(data interface{}) error {
|
||||
actualCS := New(data)
|
||||
if cs != actualCS {
|
||||
return &errors.CorruptCheckpointError{ActualCS: uint64(actualCS), ExpectedCS: uint64(cs)}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// New returns the Checksum of checkpoint data
|
||||
func New(data interface{}) Checksum {
|
||||
return Checksum(getChecksum(data))
|
||||
}
|
||||
|
||||
// Get returns calculated checksum of checkpoint data
|
||||
func getChecksum(data interface{}) uint64 {
|
||||
hash := fnv.New32a()
|
||||
hashutil.DeepHashObject(hash, data)
|
||||
return uint64(hash.Sum32())
|
||||
}
|
45
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors/errors.go
generated
vendored
Normal file
45
vendor/k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors/errors.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package errors
|
||||
|
||||
import "fmt"
|
||||
|
||||
// ErrCorruptCheckpoint error is reported when checksum does not match.
|
||||
// Check for it with:
|
||||
//
|
||||
// var csErr *CorruptCheckpointError
|
||||
// if errors.As(err, &csErr) { ... }
|
||||
// if errors.Is(err, CorruptCheckpointError{}) { ... }
|
||||
type CorruptCheckpointError struct {
|
||||
ActualCS, ExpectedCS uint64
|
||||
}
|
||||
|
||||
func (err CorruptCheckpointError) Error() string {
|
||||
return "checkpoint is corrupted"
|
||||
}
|
||||
|
||||
func (err CorruptCheckpointError) Is(target error) bool {
|
||||
switch target.(type) {
|
||||
case *CorruptCheckpointError, CorruptCheckpointError:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// ErrCheckpointNotFound is reported when checkpoint is not found for a given key
|
||||
var ErrCheckpointNotFound = fmt.Errorf("checkpoint is not found")
|
11
vendor/k8s.io/kubernetes/pkg/kubelet/cm/.mockery.yaml
generated
vendored
Normal file
11
vendor/k8s.io/kubernetes/pkg/kubelet/cm/.mockery.yaml
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
---
|
||||
dir: testing
|
||||
filename: "mock_{{.InterfaceName | snakecase}}.go"
|
||||
boilerplate-file: ../../../hack/boilerplate/boilerplate.generatego.txt
|
||||
outpkg: testing
|
||||
with-expecter: true
|
||||
packages:
|
||||
k8s.io/kubernetes/pkg/kubelet/cm:
|
||||
interfaces:
|
||||
ContainerManager:
|
||||
PodContainerManager:
|
13
vendor/k8s.io/kubernetes/pkg/kubelet/cm/OWNERS
generated
vendored
Normal file
13
vendor/k8s.io/kubernetes/pkg/kubelet/cm/OWNERS
generated
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- Random-Liu
|
||||
- dchen1107
|
||||
- derekwaynecarr
|
||||
- yujuhong
|
||||
- klueska
|
||||
reviewers:
|
||||
- sig-node-reviewers
|
||||
emeritus_approvers:
|
||||
- ConnorDoyle
|
||||
- vishh
|
62
vendor/k8s.io/kubernetes/pkg/kubelet/cm/admission/errors.go
generated
vendored
Normal file
62
vendor/k8s.io/kubernetes/pkg/kubelet/cm/admission/errors.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package admission
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
)
|
||||
|
||||
const (
|
||||
ErrorReasonUnexpected = "UnexpectedAdmissionError"
|
||||
)
|
||||
|
||||
type Error interface {
|
||||
Error() string
|
||||
Type() string
|
||||
}
|
||||
|
||||
type unexpectedAdmissionError struct{ Err error }
|
||||
|
||||
var _ Error = (*unexpectedAdmissionError)(nil)
|
||||
|
||||
func (e *unexpectedAdmissionError) Error() string {
|
||||
return fmt.Sprintf("Allocate failed due to %v, which is unexpected", e.Err)
|
||||
}
|
||||
|
||||
func (e *unexpectedAdmissionError) Type() string {
|
||||
return ErrorReasonUnexpected
|
||||
}
|
||||
|
||||
func GetPodAdmitResult(err error) lifecycle.PodAdmitResult {
|
||||
if err == nil {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
|
||||
var admissionErr Error
|
||||
if !errors.As(err, &admissionErr) {
|
||||
admissionErr = &unexpectedAdmissionError{err}
|
||||
}
|
||||
|
||||
return lifecycle.PodAdmitResult{
|
||||
Message: admissionErr.Error(),
|
||||
Reason: admissionErr.Type(),
|
||||
Admit: false,
|
||||
}
|
||||
}
|
485
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_manager_linux.go
generated
vendored
Normal file
485
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_manager_linux.go
generated
vendored
Normal file
@ -0,0 +1,485 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
libcontainercgroupmanager "github.com/opencontainers/runc/libcontainer/cgroups/manager"
|
||||
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
|
||||
"k8s.io/klog/v2"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
// systemdSuffix is the cgroup name suffix for systemd
|
||||
systemdSuffix string = ".slice"
|
||||
// Cgroup2MemoryMin is memory.min for cgroup v2
|
||||
Cgroup2MemoryMin string = "memory.min"
|
||||
// Cgroup2MemoryHigh is memory.high for cgroup v2
|
||||
Cgroup2MemoryHigh string = "memory.high"
|
||||
Cgroup2MaxCpuLimit string = "max"
|
||||
Cgroup2MaxSwapFilename string = "memory.swap.max"
|
||||
)
|
||||
|
||||
var RootCgroupName = CgroupName([]string{})
|
||||
|
||||
// NewCgroupName composes a new cgroup name.
|
||||
// Use RootCgroupName as base to start at the root.
|
||||
// This function does some basic check for invalid characters at the name.
|
||||
func NewCgroupName(base CgroupName, components ...string) CgroupName {
|
||||
for _, component := range components {
|
||||
// Forbit using "_" in internal names. When remapping internal
|
||||
// names to systemd cgroup driver, we want to remap "-" => "_",
|
||||
// so we forbid "_" so that we can always reverse the mapping.
|
||||
if strings.Contains(component, "/") || strings.Contains(component, "_") {
|
||||
panic(fmt.Errorf("invalid character in component [%q] of CgroupName", component))
|
||||
}
|
||||
}
|
||||
return CgroupName(append(append([]string{}, base...), components...))
|
||||
}
|
||||
|
||||
func escapeSystemdCgroupName(part string) string {
|
||||
return strings.Replace(part, "-", "_", -1)
|
||||
}
|
||||
|
||||
func unescapeSystemdCgroupName(part string) string {
|
||||
return strings.Replace(part, "_", "-", -1)
|
||||
}
|
||||
|
||||
// cgroupName.ToSystemd converts the internal cgroup name to a systemd name.
|
||||
// For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes
|
||||
// "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice"
|
||||
// This function always expands the systemd name into the cgroupfs form. If only
|
||||
// the last part is needed, use path.Base(...) on it to discard the rest.
|
||||
func (cgroupName CgroupName) ToSystemd() string {
|
||||
if len(cgroupName) == 0 || (len(cgroupName) == 1 && cgroupName[0] == "") {
|
||||
return "/"
|
||||
}
|
||||
newparts := []string{}
|
||||
for _, part := range cgroupName {
|
||||
part = escapeSystemdCgroupName(part)
|
||||
newparts = append(newparts, part)
|
||||
}
|
||||
|
||||
result, err := cgroupsystemd.ExpandSlice(strings.Join(newparts, "-") + systemdSuffix)
|
||||
if err != nil {
|
||||
// Should never happen...
|
||||
panic(fmt.Errorf("error converting cgroup name [%v] to systemd format: %v", cgroupName, err))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func ParseSystemdToCgroupName(name string) CgroupName {
|
||||
driverName := path.Base(name)
|
||||
driverName = strings.TrimSuffix(driverName, systemdSuffix)
|
||||
parts := strings.Split(driverName, "-")
|
||||
result := []string{}
|
||||
for _, part := range parts {
|
||||
result = append(result, unescapeSystemdCgroupName(part))
|
||||
}
|
||||
return CgroupName(result)
|
||||
}
|
||||
|
||||
func (cgroupName CgroupName) ToCgroupfs() string {
|
||||
return "/" + path.Join(cgroupName...)
|
||||
}
|
||||
|
||||
func ParseCgroupfsToCgroupName(name string) CgroupName {
|
||||
components := strings.Split(strings.TrimPrefix(name, "/"), "/")
|
||||
if len(components) == 1 && components[0] == "" {
|
||||
components = []string{}
|
||||
}
|
||||
return CgroupName(components)
|
||||
}
|
||||
|
||||
func IsSystemdStyleName(name string) bool {
|
||||
return strings.HasSuffix(name, systemdSuffix)
|
||||
}
|
||||
|
||||
// CgroupSubsystems holds information about the mounted cgroup subsystems
|
||||
type CgroupSubsystems struct {
|
||||
// Cgroup subsystem mounts.
|
||||
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
|
||||
Mounts []libcontainercgroups.Mount
|
||||
|
||||
// Cgroup subsystem to their mount location.
|
||||
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
|
||||
MountPoints map[string]string
|
||||
}
|
||||
|
||||
// cgroupCommon implements common tasks
|
||||
// that are valid for both cgroup v1 and v2.
|
||||
// This prevents duplicating the code between
|
||||
// v1 and v2 specific implementations.
|
||||
type cgroupCommon struct {
|
||||
// subsystems holds information about all the
|
||||
// mounted cgroup subsystems on the node
|
||||
subsystems *CgroupSubsystems
|
||||
|
||||
// useSystemd tells if systemd cgroup manager should be used.
|
||||
useSystemd bool
|
||||
}
|
||||
|
||||
// Make sure that cgroupV1impl and cgroupV2impl implement the CgroupManager interface
|
||||
var _ CgroupManager = &cgroupV1impl{}
|
||||
var _ CgroupManager = &cgroupV2impl{}
|
||||
|
||||
// NewCgroupManager is a factory method that returns a CgroupManager
|
||||
func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager {
|
||||
if libcontainercgroups.IsCgroup2UnifiedMode() {
|
||||
return NewCgroupV2Manager(cs, cgroupDriver)
|
||||
}
|
||||
return NewCgroupV1Manager(cs, cgroupDriver)
|
||||
}
|
||||
|
||||
func newCgroupCommon(cs *CgroupSubsystems, cgroupDriver string) cgroupCommon {
|
||||
return cgroupCommon{
|
||||
subsystems: cs,
|
||||
useSystemd: cgroupDriver == "systemd",
|
||||
}
|
||||
}
|
||||
|
||||
// Name converts the cgroup to the driver specific value in cgroupfs form.
|
||||
// This always returns a valid cgroupfs path even when systemd driver is in use!
|
||||
func (m *cgroupCommon) Name(name CgroupName) string {
|
||||
if m.useSystemd {
|
||||
return name.ToSystemd()
|
||||
}
|
||||
return name.ToCgroupfs()
|
||||
}
|
||||
|
||||
// CgroupName converts the literal cgroupfs name on the host to an internal identifier.
|
||||
func (m *cgroupCommon) CgroupName(name string) CgroupName {
|
||||
if m.useSystemd {
|
||||
return ParseSystemdToCgroupName(name)
|
||||
}
|
||||
return ParseCgroupfsToCgroupName(name)
|
||||
}
|
||||
|
||||
// buildCgroupPaths builds a path to each cgroup subsystem for the specified name.
|
||||
func (m *cgroupCommon) buildCgroupPaths(name CgroupName) map[string]string {
|
||||
cgroupFsAdaptedName := m.Name(name)
|
||||
cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints))
|
||||
for key, val := range m.subsystems.MountPoints {
|
||||
cgroupPaths[key] = path.Join(val, cgroupFsAdaptedName)
|
||||
}
|
||||
return cgroupPaths
|
||||
}
|
||||
|
||||
// libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config.
|
||||
func (m *cgroupCommon) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup {
|
||||
config := &libcontainerconfigs.Cgroup{
|
||||
Systemd: m.useSystemd,
|
||||
}
|
||||
if needResources {
|
||||
config.Resources = m.toResources(in.ResourceParameters)
|
||||
} else {
|
||||
config.Resources = &libcontainerconfigs.Resources{}
|
||||
}
|
||||
|
||||
if !config.Systemd {
|
||||
// For fs cgroup manager, we can either set Path or Name and Parent.
|
||||
// Setting Path is easier.
|
||||
config.Path = in.Name.ToCgroupfs()
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
// For systemd, we have to set Name and Parent, as they are needed to talk to systemd.
|
||||
// Setting Path is optional as it can be deduced from Name and Parent.
|
||||
|
||||
// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead.
|
||||
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent)
|
||||
// and split it appropriately, using essentially the logic below.
|
||||
// This was done for cgroupfs in opencontainers/runc#497 but a counterpart
|
||||
// for systemd was never introduced.
|
||||
dir, base := path.Split(in.Name.ToSystemd())
|
||||
if dir == "/" {
|
||||
dir = "-.slice"
|
||||
} else {
|
||||
dir = path.Base(dir)
|
||||
}
|
||||
config.Parent = dir
|
||||
config.Name = base
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
// Destroy destroys the specified cgroup
|
||||
func (m *cgroupCommon) Destroy(cgroupConfig *CgroupConfig) error {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
metrics.CgroupManagerDuration.WithLabelValues("destroy").Observe(metrics.SinceInSeconds(start))
|
||||
}()
|
||||
|
||||
libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, false)
|
||||
manager, err := libcontainercgroupmanager.New(libcontainerCgroupConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Delete cgroups using libcontainers Managers Destroy() method
|
||||
if err = manager.Destroy(); err != nil {
|
||||
return fmt.Errorf("unable to destroy cgroup paths for cgroup %v : %v", cgroupConfig.Name, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *cgroupCommon) SetCgroupConfig(name CgroupName, resourceConfig *ResourceConfig) error {
|
||||
containerConfig := &CgroupConfig{
|
||||
Name: name,
|
||||
ResourceParameters: resourceConfig,
|
||||
}
|
||||
|
||||
return m.Update(containerConfig)
|
||||
}
|
||||
|
||||
// getCPUWeight converts from the range [2, 262144] to [1, 10000]
|
||||
func getCPUWeight(cpuShares *uint64) uint64 {
|
||||
if cpuShares == nil {
|
||||
return 0
|
||||
}
|
||||
if *cpuShares >= 262144 {
|
||||
return 10000
|
||||
}
|
||||
return 1 + ((*cpuShares-2)*9999)/262142
|
||||
}
|
||||
|
||||
var (
|
||||
availableRootControllersOnce sync.Once
|
||||
availableRootControllers sets.Set[string]
|
||||
)
|
||||
|
||||
func (m *cgroupCommon) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources {
|
||||
resources := &libcontainerconfigs.Resources{
|
||||
SkipDevices: true,
|
||||
SkipFreezeOnSet: true,
|
||||
}
|
||||
if resourceConfig == nil {
|
||||
return resources
|
||||
}
|
||||
if resourceConfig.Memory != nil {
|
||||
resources.Memory = *resourceConfig.Memory
|
||||
}
|
||||
if resourceConfig.CPUShares != nil {
|
||||
if libcontainercgroups.IsCgroup2UnifiedMode() {
|
||||
resources.CpuWeight = getCPUWeight(resourceConfig.CPUShares)
|
||||
} else {
|
||||
resources.CpuShares = *resourceConfig.CPUShares
|
||||
}
|
||||
}
|
||||
if resourceConfig.CPUQuota != nil {
|
||||
resources.CpuQuota = *resourceConfig.CPUQuota
|
||||
}
|
||||
if resourceConfig.CPUPeriod != nil {
|
||||
resources.CpuPeriod = *resourceConfig.CPUPeriod
|
||||
}
|
||||
if resourceConfig.PidsLimit != nil {
|
||||
resources.PidsLimit = *resourceConfig.PidsLimit
|
||||
}
|
||||
if !resourceConfig.CPUSet.IsEmpty() {
|
||||
resources.CpusetCpus = resourceConfig.CPUSet.String()
|
||||
}
|
||||
|
||||
m.maybeSetHugetlb(resourceConfig, resources)
|
||||
|
||||
// Ideally unified is used for all the resources when running on cgroup v2.
|
||||
// It doesn't make difference for the memory.max limit, but for e.g. the cpu controller
|
||||
// you can specify the correct setting without relying on the conversions performed by the OCI runtime.
|
||||
if resourceConfig.Unified != nil && libcontainercgroups.IsCgroup2UnifiedMode() {
|
||||
resources.Unified = make(map[string]string)
|
||||
for k, v := range resourceConfig.Unified {
|
||||
resources.Unified[k] = v
|
||||
}
|
||||
}
|
||||
return resources
|
||||
}
|
||||
|
||||
func (m *cgroupCommon) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) {
|
||||
// Check if hugetlb is supported.
|
||||
if libcontainercgroups.IsCgroup2UnifiedMode() {
|
||||
if !getSupportedUnifiedControllers().Has("hugetlb") {
|
||||
klog.V(6).InfoS("Optional subsystem not supported: hugetlb")
|
||||
return
|
||||
}
|
||||
} else if _, ok := m.subsystems.MountPoints["hugetlb"]; !ok {
|
||||
klog.V(6).InfoS("Optional subsystem not supported: hugetlb")
|
||||
return
|
||||
}
|
||||
|
||||
// For each page size enumerated, set that value.
|
||||
pageSizes := sets.New[string]()
|
||||
for pageSize, limit := range resourceConfig.HugePageLimit {
|
||||
sizeString, err := v1helper.HugePageUnitSizeFromByteSize(pageSize)
|
||||
if err != nil {
|
||||
klog.InfoS("Invalid pageSize", "err", err)
|
||||
continue
|
||||
}
|
||||
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
|
||||
Pagesize: sizeString,
|
||||
Limit: uint64(limit),
|
||||
})
|
||||
pageSizes.Insert(sizeString)
|
||||
}
|
||||
// for each page size omitted, limit to 0
|
||||
for _, pageSize := range libcontainercgroups.HugePageSizes() {
|
||||
if pageSizes.Has(pageSize) {
|
||||
continue
|
||||
}
|
||||
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
|
||||
Pagesize: pageSize,
|
||||
Limit: uint64(0),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Update updates the cgroup with the specified Cgroup Configuration
|
||||
func (m *cgroupCommon) Update(cgroupConfig *CgroupConfig) error {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start))
|
||||
}()
|
||||
|
||||
libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true)
|
||||
manager, err := libcontainercgroupmanager.New(libcontainerCgroupConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create cgroup manager: %v", err)
|
||||
}
|
||||
return manager.Set(libcontainerCgroupConfig.Resources)
|
||||
}
|
||||
|
||||
// Create creates the specified cgroup
|
||||
func (m *cgroupCommon) Create(cgroupConfig *CgroupConfig) error {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start))
|
||||
}()
|
||||
|
||||
libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true)
|
||||
manager, err := libcontainercgroupmanager.New(libcontainerCgroupConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Apply(-1) is a hack to create the cgroup directories for each resource
|
||||
// subsystem. The function [cgroups.Manager.apply()] applies cgroup
|
||||
// configuration to the process with the specified pid.
|
||||
// It creates cgroup files for each subsystems and writes the pid
|
||||
// in the tasks file. We use the function to create all the required
|
||||
// cgroup files but not attach any "real" pid to the cgroup.
|
||||
if err := manager.Apply(-1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// it may confuse why we call set after we do apply, but the issue is that runc
|
||||
// follows a similar pattern. it's needed to ensure cpu quota is set properly.
|
||||
if err := manager.Set(libcontainerCgroupConfig.Resources); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("cgroup manager.Set failed: %w", err))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Scans through all subsystems to find pids associated with specified cgroup.
|
||||
func (m *cgroupCommon) Pids(name CgroupName) []int {
|
||||
// we need the driver specific name
|
||||
cgroupFsName := m.Name(name)
|
||||
|
||||
// Get a list of processes that we need to kill
|
||||
pidsToKill := sets.New[int]()
|
||||
var pids []int
|
||||
for _, val := range m.subsystems.MountPoints {
|
||||
dir := path.Join(val, cgroupFsName)
|
||||
_, err := os.Stat(dir)
|
||||
if os.IsNotExist(err) {
|
||||
// The subsystem pod cgroup is already deleted
|
||||
// do nothing, continue
|
||||
continue
|
||||
}
|
||||
// Get a list of pids that are still charged to the pod's cgroup
|
||||
pids, err = getCgroupProcs(dir)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
pidsToKill.Insert(pids...)
|
||||
|
||||
// WalkFunc which is called for each file and directory in the pod cgroup dir
|
||||
visitor := func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
klog.V(4).InfoS("Cgroup manager encountered error scanning cgroup path", "path", path, "err", err)
|
||||
return filepath.SkipDir
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
pids, err = getCgroupProcs(path)
|
||||
if err != nil {
|
||||
klog.V(4).InfoS("Cgroup manager encountered error getting procs for cgroup path", "path", path, "err", err)
|
||||
return filepath.SkipDir
|
||||
}
|
||||
pidsToKill.Insert(pids...)
|
||||
return nil
|
||||
}
|
||||
// Walk through the pod cgroup directory to check if
|
||||
// container cgroups haven't been GCed yet. Get attached processes to
|
||||
// all such unwanted containers under the pod cgroup
|
||||
if err = filepath.Walk(dir, visitor); err != nil {
|
||||
klog.V(4).InfoS("Cgroup manager encountered error scanning pids for directory", "path", dir, "err", err)
|
||||
}
|
||||
}
|
||||
return sets.List(pidsToKill)
|
||||
}
|
||||
|
||||
// ReduceCPULimits reduces the cgroup's cpu shares to the lowest possible value
|
||||
func (m *cgroupCommon) ReduceCPULimits(cgroupName CgroupName) error {
|
||||
// Set lowest possible CpuShares value for the cgroup
|
||||
minimumCPUShares := uint64(MinShares)
|
||||
resources := &ResourceConfig{
|
||||
CPUShares: &minimumCPUShares,
|
||||
}
|
||||
containerConfig := &CgroupConfig{
|
||||
Name: cgroupName,
|
||||
ResourceParameters: resources,
|
||||
}
|
||||
return m.Update(containerConfig)
|
||||
}
|
||||
|
||||
func readCgroupMemoryConfig(cgroupPath string, memLimitFile string) (*ResourceConfig, error) {
|
||||
memLimit, err := fscommon.GetCgroupParamUint(cgroupPath, memLimitFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %s for cgroup %v: %v", memLimitFile, cgroupPath, err)
|
||||
}
|
||||
mLim := int64(memLimit)
|
||||
//TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support
|
||||
return &ResourceConfig{Memory: &mLim}, nil
|
||||
|
||||
}
|
120
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_manager_unsupported.go
generated
vendored
Normal file
120
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_manager_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,120 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
type unsupportedCgroupManager struct{}
|
||||
|
||||
var errNotSupported = errors.New("Cgroup Manager is not supported in this build")
|
||||
|
||||
// Make sure that unsupportedCgroupManager implements the CgroupManager interface
|
||||
var _ CgroupManager = &unsupportedCgroupManager{}
|
||||
|
||||
type CgroupSubsystems struct {
|
||||
Mounts []interface{}
|
||||
MountPoints map[string]string
|
||||
}
|
||||
|
||||
func NewCgroupManager(_ interface{}) CgroupManager {
|
||||
return &unsupportedCgroupManager{}
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Version() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Name(_ CgroupName) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Validate(_ CgroupName) error {
|
||||
return errNotSupported
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Exists(_ CgroupName) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Destroy(_ *CgroupConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Update(_ *CgroupConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Create(_ *CgroupConfig) error {
|
||||
return errNotSupported
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) MemoryUsage(_ CgroupName) (int64, error) {
|
||||
return -1, errNotSupported
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) Pids(_ CgroupName) []int {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) CgroupName(name string) CgroupName {
|
||||
return CgroupName([]string{})
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) {
|
||||
return nil, errNotSupported
|
||||
}
|
||||
|
||||
func (m *unsupportedCgroupManager) SetCgroupConfig(name CgroupName, resourceConfig *ResourceConfig) error {
|
||||
return errNotSupported
|
||||
}
|
||||
|
||||
var RootCgroupName = CgroupName([]string{})
|
||||
|
||||
func NewCgroupName(base CgroupName, components ...string) CgroupName {
|
||||
return append(append([]string{}, base...), components...)
|
||||
}
|
||||
|
||||
func (cgroupName CgroupName) ToSystemd() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func ParseSystemdToCgroupName(name string) CgroupName {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cgroupName CgroupName) ToCgroupfs() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func ParseCgroupfsToCgroupName(name string) CgroupName {
|
||||
return nil
|
||||
}
|
||||
|
||||
func IsSystemdStyleName(name string) bool {
|
||||
return false
|
||||
}
|
145
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_v1_manager_linux.go
generated
vendored
Normal file
145
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_v1_manager_linux.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
)
|
||||
|
||||
const cgroupv1MemLimitFile string = "memory.limit_in_bytes"
|
||||
|
||||
// cgroupV1impl implements the CgroupManager interface
|
||||
// for cgroup v1.
|
||||
// It's a stateless object which can be used to
|
||||
// update, create or delete any number of cgroups
|
||||
// It relies on runc/libcontainer cgroup managers.
|
||||
type cgroupV1impl struct {
|
||||
cgroupCommon
|
||||
}
|
||||
|
||||
func NewCgroupV1Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager {
|
||||
return &cgroupV1impl{
|
||||
cgroupCommon: newCgroupCommon(cs, cgroupDriver),
|
||||
}
|
||||
}
|
||||
|
||||
// Version of the cgroup implementation on the host
|
||||
func (c *cgroupV1impl) Version() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Validate checks if all subsystem cgroups are valid
|
||||
func (c *cgroupV1impl) Validate(name CgroupName) error {
|
||||
// Get map of all cgroup paths on the system for the particular cgroup
|
||||
cgroupPaths := c.buildCgroupPaths(name)
|
||||
|
||||
// the presence of alternative control groups not known to runc confuses
|
||||
// the kubelet existence checks.
|
||||
// ideally, we would have a mechanism in runc to support Exists() logic
|
||||
// scoped to the set control groups it understands. this is being discussed
|
||||
// in https://github.com/opencontainers/runc/issues/1440
|
||||
// once resolved, we can remove this code.
|
||||
allowlistControllers := sets.New[string]("cpu", "cpuacct", "cpuset", "memory", "systemd", "pids")
|
||||
|
||||
if _, ok := c.subsystems.MountPoints["hugetlb"]; ok {
|
||||
allowlistControllers.Insert("hugetlb")
|
||||
}
|
||||
var missingPaths []string
|
||||
// If even one cgroup path doesn't exist, then the cgroup doesn't exist.
|
||||
for controller, path := range cgroupPaths {
|
||||
// ignore mounts we don't care about
|
||||
if !allowlistControllers.Has(controller) {
|
||||
continue
|
||||
}
|
||||
if !libcontainercgroups.PathExists(path) {
|
||||
missingPaths = append(missingPaths, path)
|
||||
}
|
||||
}
|
||||
|
||||
if len(missingPaths) > 0 {
|
||||
return fmt.Errorf("cgroup %q has some missing paths: %v", name, strings.Join(missingPaths, ", "))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Exists checks if all subsystem cgroups already exist
|
||||
func (c *cgroupV1impl) Exists(name CgroupName) bool {
|
||||
return c.Validate(name) == nil
|
||||
}
|
||||
|
||||
// MemoryUsage returns the current memory usage of the specified cgroup,
|
||||
// as read from cgroupfs.
|
||||
func (c *cgroupV1impl) MemoryUsage(name CgroupName) (int64, error) {
|
||||
var path, file string
|
||||
mp, ok := c.subsystems.MountPoints["memory"]
|
||||
if !ok { // should not happen
|
||||
return -1, errors.New("no cgroup v1 mountpoint for memory controller found")
|
||||
}
|
||||
path = mp + "/" + c.Name(name)
|
||||
file = "memory.usage_in_bytes"
|
||||
val, err := fscommon.GetCgroupParamUint(path, file)
|
||||
return int64(val), err
|
||||
}
|
||||
|
||||
// Get the resource config values applied to the cgroup for specified resource type
|
||||
func (c *cgroupV1impl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) {
|
||||
cgroupPaths := c.buildCgroupPaths(name)
|
||||
cgroupResourcePath, found := cgroupPaths[string(resource)]
|
||||
if !found {
|
||||
return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name)
|
||||
}
|
||||
switch resource {
|
||||
case v1.ResourceCPU:
|
||||
return c.getCgroupCPUConfig(cgroupResourcePath)
|
||||
case v1.ResourceMemory:
|
||||
return c.getCgroupMemoryConfig(cgroupResourcePath)
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name)
|
||||
}
|
||||
|
||||
func (c *cgroupV1impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, error) {
|
||||
cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupPath, "cpu.cfs_quota_us")
|
||||
if errQ != nil {
|
||||
return nil, fmt.Errorf("failed to read CPU quota for cgroup %v: %w", cgroupPath, errQ)
|
||||
}
|
||||
cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64)
|
||||
if errInt != nil {
|
||||
return nil, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %w", cgroupPath, errInt)
|
||||
}
|
||||
cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupPath, "cpu.cfs_period_us")
|
||||
if errP != nil {
|
||||
return nil, fmt.Errorf("failed to read CPU period for cgroup %v: %w", cgroupPath, errP)
|
||||
}
|
||||
cpuShares, errS := fscommon.GetCgroupParamUint(cgroupPath, "cpu.shares")
|
||||
if errS != nil {
|
||||
return nil, fmt.Errorf("failed to read CPU shares for cgroup %v: %w", cgroupPath, errS)
|
||||
}
|
||||
return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuQuota, CPUPeriod: &cpuPeriod}, nil
|
||||
}
|
||||
|
||||
func (c *cgroupV1impl) getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) {
|
||||
return readCgroupMemoryConfig(cgroupPath, cgroupv1MemLimitFile)
|
||||
}
|
177
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_v2_manager_linux.go
generated
vendored
Normal file
177
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cgroup_v2_manager_linux.go
generated
vendored
Normal file
@ -0,0 +1,177 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
|
||||
)
|
||||
|
||||
const cgroupv2MemLimitFile string = "memory.max"
|
||||
|
||||
// cgroupV2impl implements the CgroupManager interface
|
||||
// for cgroup v2.
|
||||
// It's a stateless object which can be used to
|
||||
// update, create or delete any number of cgroups
|
||||
// It relies on runc/libcontainer cgroup managers.
|
||||
type cgroupV2impl struct {
|
||||
cgroupCommon
|
||||
}
|
||||
|
||||
func NewCgroupV2Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager {
|
||||
return &cgroupV2impl{
|
||||
cgroupCommon: newCgroupCommon(cs, cgroupDriver),
|
||||
}
|
||||
}
|
||||
|
||||
// Version of the cgroup implementation on the host
|
||||
func (c *cgroupV2impl) Version() int {
|
||||
return 2
|
||||
}
|
||||
|
||||
// Validate checks if all subsystem cgroups are valid
|
||||
func (c *cgroupV2impl) Validate(name CgroupName) error {
|
||||
cgroupPath := c.buildCgroupUnifiedPath(name)
|
||||
neededControllers := getSupportedUnifiedControllers()
|
||||
enabledControllers, err := readUnifiedControllers(cgroupPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read controllers for cgroup %q: %w", name, err)
|
||||
}
|
||||
difference := neededControllers.Difference(enabledControllers)
|
||||
if difference.Len() > 0 {
|
||||
return fmt.Errorf("cgroup %q has some missing controllers: %v", name, strings.Join(sets.List(difference), ", "))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Exists checks if all subsystem cgroups already exist
|
||||
func (c *cgroupV2impl) Exists(name CgroupName) bool {
|
||||
return c.Validate(name) == nil
|
||||
}
|
||||
|
||||
// MemoryUsage returns the current memory usage of the specified cgroup,
|
||||
// as read from cgroupfs.
|
||||
func (c *cgroupV2impl) MemoryUsage(name CgroupName) (int64, error) {
|
||||
var path, file string
|
||||
path = c.buildCgroupUnifiedPath(name)
|
||||
file = "memory.current"
|
||||
val, err := fscommon.GetCgroupParamUint(path, file)
|
||||
return int64(val), err
|
||||
}
|
||||
|
||||
// Get the resource config values applied to the cgroup for specified resource type
|
||||
func (c *cgroupV2impl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) {
|
||||
cgroupPaths := c.buildCgroupPaths(name)
|
||||
cgroupResourcePath, found := cgroupPaths[string(resource)]
|
||||
if !found {
|
||||
return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name)
|
||||
}
|
||||
switch resource {
|
||||
case v1.ResourceCPU:
|
||||
return c.getCgroupCPUConfig(cgroupResourcePath)
|
||||
case v1.ResourceMemory:
|
||||
return c.getCgroupMemoryConfig(cgroupResourcePath)
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name)
|
||||
}
|
||||
|
||||
func (c *cgroupV2impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, error) {
|
||||
var cpuLimitStr, cpuPeriodStr string
|
||||
cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %w", cgroupPath, err)
|
||||
}
|
||||
numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr)
|
||||
if errScan != nil || numItems != 2 {
|
||||
return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %w",
|
||||
cpuLimitAndPeriod, cgroupPath, errScan)
|
||||
}
|
||||
cpuLimit := int64(-1)
|
||||
if cpuLimitStr != Cgroup2MaxCpuLimit {
|
||||
cpuLimit, err = strconv.ParseInt(cpuLimitStr, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert CPU limit as integer for cgroup %v: %w", cgroupPath, err)
|
||||
}
|
||||
}
|
||||
cpuPeriod, errPeriod := strconv.ParseUint(cpuPeriodStr, 10, 64)
|
||||
if errPeriod != nil {
|
||||
return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %w", cgroupPath, errPeriod)
|
||||
}
|
||||
cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight")
|
||||
if errWeight != nil {
|
||||
return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %w", cgroupPath, errWeight)
|
||||
}
|
||||
cpuShares := cpuWeightToCPUShares(cpuWeight)
|
||||
return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuLimit, CPUPeriod: &cpuPeriod}, nil
|
||||
}
|
||||
|
||||
func (c *cgroupV2impl) getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) {
|
||||
return readCgroupMemoryConfig(cgroupPath, cgroupv2MemLimitFile)
|
||||
}
|
||||
|
||||
// getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2
|
||||
func getSupportedUnifiedControllers() sets.Set[string] {
|
||||
// This is the set of controllers used by the Kubelet
|
||||
supportedControllers := sets.New("cpu", "cpuset", "memory", "hugetlb", "pids")
|
||||
// Memoize the set of controllers that are present in the root cgroup
|
||||
availableRootControllersOnce.Do(func() {
|
||||
var err error
|
||||
availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot))
|
||||
}
|
||||
})
|
||||
// Return the set of controllers that are supported both by the Kubelet and by the kernel
|
||||
return supportedControllers.Intersection(availableRootControllers)
|
||||
}
|
||||
|
||||
// readUnifiedControllers reads the controllers available at the specified cgroup
|
||||
func readUnifiedControllers(path string) (sets.Set[string], error) {
|
||||
controllersFileContent, err := os.ReadFile(filepath.Join(path, "cgroup.controllers"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllers := strings.Fields(string(controllersFileContent))
|
||||
return sets.New(controllers...), nil
|
||||
}
|
||||
|
||||
// buildCgroupUnifiedPath builds a path to the specified name.
|
||||
func (c *cgroupV2impl) buildCgroupUnifiedPath(name CgroupName) string {
|
||||
cgroupFsAdaptedName := c.Name(name)
|
||||
return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName)
|
||||
}
|
||||
|
||||
// Convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight
|
||||
// https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2
|
||||
func cpuSharesToCPUWeight(cpuShares uint64) uint64 {
|
||||
return uint64((((cpuShares - 2) * 9999) / 262142) + 1)
|
||||
}
|
||||
|
||||
// Convert cgroup v2 cpu.weight value to cgroup v1 cpu.shares
|
||||
// https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2
|
||||
func cpuWeightToCPUShares(cpuWeight uint64) uint64 {
|
||||
return uint64((((cpuWeight - 1) * 262142) / 9999) + 2)
|
||||
}
|
283
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager.go
generated
vendored
Normal file
283
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager.go
generated
vendored
Normal file
@ -0,0 +1,283 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
//go:generate mockery
|
||||
package cm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
|
||||
// TODO: Migrate kubelet to either use its own internal objects or client library.
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/resourceupdates"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
const (
|
||||
// Warning message for the users still using cgroup v1
|
||||
CgroupV1MaintenanceModeWarning = "cgroup v1 support is in maintenance mode, please migrate to cgroup v2"
|
||||
|
||||
// Warning message for the users using cgroup v2 on kernel doesn't support root `cpu.stat`.
|
||||
// `cpu.stat` was added to root cgroup in kernel 5.8.
|
||||
// (ref: https://github.com/torvalds/linux/commit/936f2a70f2077f64fab1dcb3eca71879e82ecd3f)
|
||||
CgroupV2KernelWarning = "cgroup v2 is being used on a kernel, which doesn't support root `cpu.stat`." +
|
||||
"Kubelet will continue, but may experience instability or wrong behavior"
|
||||
)
|
||||
|
||||
type ActivePodsFunc func() []*v1.Pod
|
||||
|
||||
type GetNodeFunc func() (*v1.Node, error)
|
||||
|
||||
// Manages the containers running on a machine.
|
||||
type ContainerManager interface {
|
||||
// Runs the container manager's housekeeping.
|
||||
// - Ensures that the Docker daemon is in a container.
|
||||
// - Creates the system container where all non-containerized processes run.
|
||||
Start(context.Context, *v1.Node, ActivePodsFunc, GetNodeFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error
|
||||
|
||||
// SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
|
||||
// These cgroups include the system and Kubernetes services.
|
||||
SystemCgroupsLimit() v1.ResourceList
|
||||
|
||||
// GetNodeConfig returns a NodeConfig that is being used by the container manager.
|
||||
GetNodeConfig() NodeConfig
|
||||
|
||||
// Status returns internal Status.
|
||||
Status() Status
|
||||
|
||||
// NewPodContainerManager is a factory method which returns a podContainerManager object
|
||||
// Returns a noop implementation if qos cgroup hierarchy is not enabled
|
||||
NewPodContainerManager() PodContainerManager
|
||||
|
||||
// GetMountedSubsystems returns the mounted cgroup subsystems on the node
|
||||
GetMountedSubsystems() *CgroupSubsystems
|
||||
|
||||
// GetQOSContainersInfo returns the names of top level QoS containers
|
||||
GetQOSContainersInfo() QOSContainersInfo
|
||||
|
||||
// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
|
||||
GetNodeAllocatableReservation() v1.ResourceList
|
||||
|
||||
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
|
||||
GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList
|
||||
|
||||
// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
|
||||
// node allocatable (amount of total healthy resources reported by device plugin),
|
||||
// and inactive device plugin resources previously registered on the node.
|
||||
GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string)
|
||||
|
||||
// UpdateQOSCgroups performs housekeeping updates to ensure that the top
|
||||
// level QoS containers have their desired state in a thread-safe way
|
||||
UpdateQOSCgroups() error
|
||||
|
||||
// GetResources returns RunContainerOptions with devices, mounts, and env fields populated for
|
||||
// extended resources required by container.
|
||||
GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error)
|
||||
|
||||
// UpdatePluginResources calls Allocate of device plugin handler for potential
|
||||
// requests for device plugin resources, and returns an error if fails.
|
||||
// Otherwise, it updates allocatableResource in nodeInfo if necessary,
|
||||
// to make sure it is at least equal to the pod's requested capacity for
|
||||
// any registered device plugin resource
|
||||
UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error
|
||||
|
||||
InternalContainerLifecycle() InternalContainerLifecycle
|
||||
|
||||
// GetPodCgroupRoot returns the cgroup which contains all pods.
|
||||
GetPodCgroupRoot() string
|
||||
|
||||
// GetPluginRegistrationHandlers returns a set of plugin registration handlers
|
||||
// The pluginwatcher's Handlers allow to have a single module for handling
|
||||
// registration.
|
||||
GetPluginRegistrationHandlers() map[string]cache.PluginHandler
|
||||
|
||||
// GetHealthCheckers returns a set of health checkers for all plugins.
|
||||
// These checkers are integrated into the systemd watchdog to monitor the service's health.
|
||||
GetHealthCheckers() []healthz.HealthChecker
|
||||
|
||||
// ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed,
|
||||
// due to node recreation.
|
||||
ShouldResetExtendedResourceCapacity() bool
|
||||
|
||||
// GetAllocateResourcesPodAdmitHandler returns an instance of a PodAdmitHandler responsible for allocating pod resources.
|
||||
GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler
|
||||
|
||||
// GetNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement.
|
||||
GetNodeAllocatableAbsolute() v1.ResourceList
|
||||
|
||||
// PrepareDynamicResource prepares dynamic pod resources
|
||||
PrepareDynamicResources(context.Context, *v1.Pod) error
|
||||
|
||||
// UnprepareDynamicResources unprepares dynamic pod resources
|
||||
UnprepareDynamicResources(context.Context, *v1.Pod) error
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
|
||||
// might need to unprepare resources.
|
||||
PodMightNeedToUnprepareResources(UID types.UID) bool
|
||||
|
||||
// UpdateAllocatedResourcesStatus updates the status of allocated resources for the pod.
|
||||
UpdateAllocatedResourcesStatus(pod *v1.Pod, status *v1.PodStatus)
|
||||
|
||||
// Updates returns a channel that receives an Update when the device changed its status.
|
||||
Updates() <-chan resourceupdates.Update
|
||||
|
||||
// Implements the PodResources Provider API
|
||||
podresources.CPUsProvider
|
||||
podresources.DevicesProvider
|
||||
podresources.MemoryProvider
|
||||
podresources.DynamicResourcesProvider
|
||||
}
|
||||
|
||||
type NodeConfig struct {
|
||||
NodeName types.NodeName
|
||||
RuntimeCgroupsName string
|
||||
SystemCgroupsName string
|
||||
KubeletCgroupsName string
|
||||
KubeletOOMScoreAdj int32
|
||||
ContainerRuntime string
|
||||
CgroupsPerQOS bool
|
||||
CgroupRoot string
|
||||
CgroupDriver string
|
||||
KubeletRootDir string
|
||||
ProtectKernelDefaults bool
|
||||
NodeAllocatableConfig
|
||||
QOSReserved map[v1.ResourceName]int64
|
||||
CPUManagerPolicy string
|
||||
CPUManagerPolicyOptions map[string]string
|
||||
TopologyManagerScope string
|
||||
CPUManagerReconcilePeriod time.Duration
|
||||
ExperimentalMemoryManagerPolicy string
|
||||
ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation
|
||||
PodPidsLimit int64
|
||||
EnforceCPULimits bool
|
||||
CPUCFSQuotaPeriod time.Duration
|
||||
TopologyManagerPolicy string
|
||||
TopologyManagerPolicyOptions map[string]string
|
||||
CgroupVersion int
|
||||
}
|
||||
|
||||
type NodeAllocatableConfig struct {
|
||||
KubeReservedCgroupName string
|
||||
SystemReservedCgroupName string
|
||||
ReservedSystemCPUs cpuset.CPUSet
|
||||
EnforceNodeAllocatable sets.Set[string]
|
||||
KubeReserved v1.ResourceList
|
||||
SystemReserved v1.ResourceList
|
||||
HardEvictionThresholds []evictionapi.Threshold
|
||||
}
|
||||
|
||||
type Status struct {
|
||||
// Any soft requirements that were unsatisfied.
|
||||
SoftRequirements error
|
||||
}
|
||||
|
||||
func int64Slice(in []int) []int64 {
|
||||
out := make([]int64, len(in))
|
||||
for i := range in {
|
||||
out[i] = int64(in[i])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// parsePercentage parses the percentage string to numeric value.
|
||||
func parsePercentage(v string) (int64, error) {
|
||||
if !strings.HasSuffix(v, "%") {
|
||||
return 0, fmt.Errorf("percentage expected, got '%s'", v)
|
||||
}
|
||||
percentage, err := strconv.ParseInt(strings.TrimRight(v, "%"), 10, 0)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid number in percentage '%s'", v)
|
||||
}
|
||||
if percentage < 0 || percentage > 100 {
|
||||
return 0, fmt.Errorf("percentage must be between 0 and 100")
|
||||
}
|
||||
return percentage, nil
|
||||
}
|
||||
|
||||
// ParseQOSReserved parses the --qos-reserved option
|
||||
func ParseQOSReserved(m map[string]string) (*map[v1.ResourceName]int64, error) {
|
||||
reservations := make(map[v1.ResourceName]int64)
|
||||
for k, v := range m {
|
||||
switch v1.ResourceName(k) {
|
||||
// Only memory resources are supported.
|
||||
case v1.ResourceMemory:
|
||||
q, err := parsePercentage(v)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse percentage %q for %q resource: %w", v, k, err)
|
||||
}
|
||||
reservations[v1.ResourceName(k)] = q
|
||||
default:
|
||||
return nil, fmt.Errorf("cannot reserve %q resource", k)
|
||||
}
|
||||
}
|
||||
return &reservations, nil
|
||||
}
|
||||
|
||||
func containerDevicesFromResourceDeviceInstances(devs devicemanager.ResourceDeviceInstances) []*podresourcesapi.ContainerDevices {
|
||||
var respDevs []*podresourcesapi.ContainerDevices
|
||||
|
||||
for resourceName, resourceDevs := range devs {
|
||||
for devID, dev := range resourceDevs {
|
||||
topo := dev.GetTopology()
|
||||
if topo == nil {
|
||||
// Some device plugin do not report the topology information.
|
||||
// This is legal, so we report the devices anyway,
|
||||
// let the client decide what to do.
|
||||
respDevs = append(respDevs, &podresourcesapi.ContainerDevices{
|
||||
ResourceName: resourceName,
|
||||
DeviceIds: []string{devID},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
for _, node := range topo.GetNodes() {
|
||||
respDevs = append(respDevs, &podresourcesapi.ContainerDevices{
|
||||
ResourceName: resourceName,
|
||||
DeviceIds: []string{devID},
|
||||
Topology: &podresourcesapi.TopologyInfo{
|
||||
Nodes: []*podresourcesapi.NUMANode{
|
||||
{
|
||||
ID: node.GetID(),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return respDevs
|
||||
}
|
1049
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_linux.go
generated
vendored
Normal file
1049
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_linux.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
211
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_stub.go
generated
vendored
Normal file
211
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_stub.go
generated
vendored
Normal file
@ -0,0 +1,211 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/resourceupdates"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
type containerManagerStub struct {
|
||||
shouldResetExtendedResourceCapacity bool
|
||||
extendedPluginResources v1.ResourceList
|
||||
}
|
||||
|
||||
var _ ContainerManager = &containerManagerStub{}
|
||||
|
||||
func (cm *containerManagerStub) Start(_ context.Context, _ *v1.Node, _ ActivePodsFunc, _ GetNodeFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
|
||||
klog.V(2).InfoS("Starting stub container manager")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) SystemCgroupsLimit() v1.ResourceList {
|
||||
return v1.ResourceList{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetNodeConfig() NodeConfig {
|
||||
return NodeConfig{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetMountedSubsystems() *CgroupSubsystems {
|
||||
return &CgroupSubsystems{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetQOSContainersInfo() QOSContainersInfo {
|
||||
return QOSContainersInfo{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) UpdateQOSCgroups() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) Status() Status {
|
||||
return Status{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
if !localStorageCapacityIsolation {
|
||||
return v1.ResourceList{}
|
||||
}
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(
|
||||
int64(0),
|
||||
resource.BinarySI),
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetPluginRegistrationHandlers() map[string]cache.PluginHandler {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetHealthCheckers() []healthz.HealthChecker {
|
||||
return []healthz.HealthChecker{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) {
|
||||
return cm.extendedPluginResources, cm.extendedPluginResources, []string{}
|
||||
}
|
||||
|
||||
func (m *podContainerManagerStub) GetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName) (*ResourceConfig, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (m *podContainerManagerStub) SetPodCgroupConfig(pod *v1.Pod, resourceConfig *ResourceConfig) error {
|
||||
return fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager {
|
||||
return &podContainerManagerStub{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) {
|
||||
return &kubecontainer.RunContainerOptions{}, nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) InternalContainerLifecycle() InternalContainerLifecycle {
|
||||
return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), memorymanager.NewFakeManager(), topologymanager.NewFakeManager()}
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetPodCgroupRoot() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetDevices(_, _ string) []*podresourcesapi.ContainerDevices {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetAllocatableDevices() []*podresourcesapi.ContainerDevices {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) ShouldResetExtendedResourceCapacity() bool {
|
||||
return cm.shouldResetExtendedResourceCapacity
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler {
|
||||
return topologymanager.NewFakeManager()
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) UpdateAllocatedDevices() {
|
||||
return
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetCPUs(_, _ string) []int64 {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetAllocatableCPUs() []int64 {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetMemory(_, _ string) []*podresourcesapi.ContainerMemory {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetAllocatableMemory() []*podresourcesapi.ContainerMemory {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetDynamicResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.DynamicResource {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetNodeAllocatableAbsolute() v1.ResourceList {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) UpdateAllocatedResourcesStatus(pod *v1.Pod, status *v1.PodStatus) {
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) Updates() <-chan resourceupdates.Update {
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewStubContainerManager() ContainerManager {
|
||||
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
|
||||
}
|
||||
|
||||
func NewStubContainerManagerWithExtendedResource(shouldResetExtendedResourceCapacity bool) ContainerManager {
|
||||
return &containerManagerStub{shouldResetExtendedResourceCapacity: shouldResetExtendedResourceCapacity}
|
||||
}
|
||||
|
||||
func NewStubContainerManagerWithDevicePluginResource(extendedPluginResources v1.ResourceList) ContainerManager {
|
||||
return &containerManagerStub{
|
||||
shouldResetExtendedResourceCapacity: false,
|
||||
extendedPluginResources: extendedPluginResources,
|
||||
}
|
||||
}
|
49
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_unsupported.go
generated
vendored
Normal file
49
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
//go:build !linux && !windows
|
||||
// +build !linux,!windows
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/mount-utils"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/tools/record"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
)
|
||||
|
||||
type unsupportedContainerManager struct {
|
||||
containerManagerStub
|
||||
}
|
||||
|
||||
var _ ContainerManager = &unsupportedContainerManager{}
|
||||
|
||||
func (unsupportedContainerManager) Start(_ context.Context, _ *v1.Node, _ ActivePodsFunc, _ GetNodeFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
|
||||
return fmt.Errorf("Container Manager is unsupported in this build")
|
||||
}
|
||||
|
||||
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool, recorder record.EventRecorder, kubeClient clientset.Interface) (ContainerManager, error) {
|
||||
return &unsupportedContainerManager{}, nil
|
||||
}
|
371
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_windows.go
generated
vendored
Normal file
371
vendor/k8s.io/kubernetes/pkg/kubelet/cm/container_manager_windows.go
generated
vendored
Normal file
@ -0,0 +1,371 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// containerManagerImpl implements container manager on Windows.
|
||||
// Only GetNodeAllocatableReservation() and GetCapacity() are implemented now.
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/mount-utils"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/tools/record"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
pluginwatcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/resourceupdates"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
type containerManagerImpl struct {
|
||||
// Capacity of this node.
|
||||
capacity v1.ResourceList
|
||||
// Interface for cadvisor.
|
||||
cadvisorInterface cadvisor.Interface
|
||||
// Config of this node.
|
||||
nodeConfig NodeConfig
|
||||
// Interface for exporting and allocating devices reported by device plugins.
|
||||
deviceManager devicemanager.Manager
|
||||
// Interface for Topology resource co-ordination
|
||||
topologyManager topologymanager.Manager
|
||||
cpuManager cpumanager.Manager
|
||||
memoryManager memorymanager.Manager
|
||||
nodeInfo *v1.Node
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) Start(ctx context.Context, node *v1.Node,
|
||||
activePods ActivePodsFunc,
|
||||
getNode GetNodeFunc,
|
||||
sourcesReady config.SourcesReady,
|
||||
podStatusProvider status.PodStatusProvider,
|
||||
runtimeService internalapi.RuntimeService,
|
||||
localStorageCapacityIsolation bool) error {
|
||||
klog.V(2).InfoS("Starting Windows container manager")
|
||||
|
||||
cm.nodeInfo = node
|
||||
|
||||
if localStorageCapacityIsolation {
|
||||
rootfs, err := cm.cadvisorInterface.RootFsInfo()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get rootfs info: %v", err)
|
||||
}
|
||||
for rName, rCap := range cadvisor.EphemeralStorageCapacityFromFsInfo(rootfs) {
|
||||
cm.capacity[rName] = rCap
|
||||
}
|
||||
}
|
||||
|
||||
containerMap, containerRunningSet := buildContainerMapAndRunningSetFromRuntime(ctx, runtimeService)
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) {
|
||||
err := cm.cpuManager.Start(cpumanager.ActivePodsFunc(activePods), sourcesReady, podStatusProvider, runtimeService, containerMap.Clone())
|
||||
if err != nil {
|
||||
return fmt.Errorf("start cpu manager error: %v", err)
|
||||
}
|
||||
|
||||
// Initialize memory manager
|
||||
err = cm.memoryManager.Start(memorymanager.ActivePodsFunc(activePods), sourcesReady, podStatusProvider, runtimeService, containerMap.Clone())
|
||||
if err != nil {
|
||||
return fmt.Errorf("start memory manager error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Starts device manager.
|
||||
if err := cm.deviceManager.Start(devicemanager.ActivePodsFunc(activePods), sourcesReady, containerMap.Clone(), containerRunningSet); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewContainerManager creates windows container manager.
|
||||
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, recorder record.EventRecorder, kubeClient clientset.Interface) (ContainerManager, error) {
|
||||
// It is safe to invoke `MachineInfo` on cAdvisor before logically initializing cAdvisor here because
|
||||
// machine info is computed and cached once as part of cAdvisor object creation.
|
||||
// But `RootFsInfo` and `ImagesFsInfo` are not available at this moment so they will be called later during manager starts
|
||||
machineInfo, err := cadvisorInterface.MachineInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
capacity := cadvisor.CapacityFromMachineInfo(machineInfo)
|
||||
|
||||
cm := &containerManagerImpl{
|
||||
capacity: capacity,
|
||||
nodeConfig: nodeConfig,
|
||||
cadvisorInterface: cadvisorInterface,
|
||||
}
|
||||
|
||||
cm.topologyManager = topologymanager.NewFakeManager()
|
||||
cm.cpuManager = cpumanager.NewFakeManager()
|
||||
cm.memoryManager = memorymanager.NewFakeManager()
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) {
|
||||
klog.InfoS("Creating topology manager")
|
||||
cm.topologyManager, err = topologymanager.NewManager(machineInfo.Topology,
|
||||
nodeConfig.TopologyManagerPolicy,
|
||||
nodeConfig.TopologyManagerScope,
|
||||
nodeConfig.TopologyManagerPolicyOptions)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to initialize topology manager")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
klog.InfoS("Creating cpu manager")
|
||||
cm.cpuManager, err = cpumanager.NewManager(
|
||||
nodeConfig.CPUManagerPolicy,
|
||||
nodeConfig.CPUManagerPolicyOptions,
|
||||
nodeConfig.CPUManagerReconcilePeriod,
|
||||
machineInfo,
|
||||
nodeConfig.NodeAllocatableConfig.ReservedSystemCPUs,
|
||||
cm.GetNodeAllocatableReservation(),
|
||||
nodeConfig.KubeletRootDir,
|
||||
cm.topologyManager,
|
||||
)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to initialize cpu manager")
|
||||
return nil, err
|
||||
}
|
||||
cm.topologyManager.AddHintProvider(cm.cpuManager)
|
||||
|
||||
klog.InfoS("Creating memory manager")
|
||||
cm.memoryManager, err = memorymanager.NewManager(
|
||||
nodeConfig.ExperimentalMemoryManagerPolicy,
|
||||
machineInfo,
|
||||
cm.GetNodeAllocatableReservation(),
|
||||
nodeConfig.ExperimentalMemoryManagerReservedMemory,
|
||||
nodeConfig.KubeletRootDir,
|
||||
cm.topologyManager,
|
||||
)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to initialize memory manager")
|
||||
return nil, err
|
||||
}
|
||||
cm.topologyManager.AddHintProvider(cm.memoryManager)
|
||||
}
|
||||
|
||||
klog.InfoS("Creating device plugin manager")
|
||||
cm.deviceManager, err = devicemanager.NewManagerImpl(nil, cm.topologyManager)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cm.topologyManager.AddHintProvider(cm.deviceManager)
|
||||
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList {
|
||||
return v1.ResourceList{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetNodeConfig() NodeConfig {
|
||||
cm.RLock()
|
||||
defer cm.RUnlock()
|
||||
return cm.nodeConfig
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems {
|
||||
return &CgroupSubsystems{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetQOSContainersInfo() QOSContainersInfo {
|
||||
return QOSContainersInfo{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) UpdateQOSCgroups() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) Status() Status {
|
||||
return Status{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList {
|
||||
evictionReservation := hardEvictionReservation(cm.nodeConfig.HardEvictionThresholds, cm.capacity)
|
||||
result := make(v1.ResourceList)
|
||||
for k := range cm.capacity {
|
||||
value := resource.NewQuantity(0, resource.DecimalSI)
|
||||
if cm.nodeConfig.SystemReserved != nil {
|
||||
value.Add(cm.nodeConfig.SystemReserved[k])
|
||||
}
|
||||
if cm.nodeConfig.KubeReserved != nil {
|
||||
value.Add(cm.nodeConfig.KubeReserved[k])
|
||||
}
|
||||
if evictionReservation != nil {
|
||||
value.Add(evictionReservation[k])
|
||||
}
|
||||
if !value.IsZero() {
|
||||
result[k] = *value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
return cm.capacity
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetPluginRegistrationHandlers() map[string]cache.PluginHandler {
|
||||
// DRA is not supported on Windows, only device plugin is supported
|
||||
return map[string]cache.PluginHandler{pluginwatcherapi.DevicePlugin: cm.deviceManager.GetWatcherHandler()}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetHealthCheckers() []healthz.HealthChecker {
|
||||
return []healthz.HealthChecker{cm.deviceManager.GetHealthChecker()}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) {
|
||||
return cm.deviceManager.GetCapacity()
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
|
||||
return &podContainerManagerStub{}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) {
|
||||
opts := &kubecontainer.RunContainerOptions{}
|
||||
// Allocate should already be called during predicateAdmitHandler.Admit(),
|
||||
// just try to fetch device runtime information from cached state here
|
||||
devOpts, err := cm.deviceManager.GetDeviceRunContainerOptions(pod, container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if devOpts == nil {
|
||||
return opts, nil
|
||||
}
|
||||
opts.Devices = append(opts.Devices, devOpts.Devices...)
|
||||
opts.Mounts = append(opts.Mounts, devOpts.Mounts...)
|
||||
opts.Envs = append(opts.Envs, devOpts.Envs...)
|
||||
opts.Annotations = append(opts.Annotations, devOpts.Annotations...)
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) UpdateAllocatedResourcesStatus(pod *v1.Pod, status *v1.PodStatus) {
|
||||
// For now we only support Device Plugin
|
||||
|
||||
cm.deviceManager.UpdateAllocatedResourcesStatus(pod, status)
|
||||
|
||||
// TODO(SergeyKanzhelev, https://kep.k8s.io/4680): add support for DRA resources when DRA supports Windows
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) Updates() <-chan resourceupdates.Update {
|
||||
// TODO(SergeyKanzhelev, https://kep.k8s.io/4680): add support for DRA resources, for now only use device plugin updates
|
||||
return cm.deviceManager.Updates()
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||
return cm.deviceManager.UpdatePluginResources(node, attrs)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle {
|
||||
return &internalContainerLifecycleImpl{cm.cpuManager, cm.memoryManager, cm.topologyManager}
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetPodCgroupRoot() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices {
|
||||
return containerDevicesFromResourceDeviceInstances(cm.deviceManager.GetDevices(podUID, containerName))
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetAllocatableDevices() []*podresourcesapi.ContainerDevices {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) ShouldResetExtendedResourceCapacity() bool {
|
||||
return cm.deviceManager.ShouldResetExtendedResourceCapacity()
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler {
|
||||
return cm.topologyManager
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) UpdateAllocatedDevices() {
|
||||
return
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetCPUs(podUID, containerName string) []int64 {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) {
|
||||
if cm.cpuManager != nil {
|
||||
return int64Slice(cm.cpuManager.GetExclusiveCPUs(podUID, containerName).UnsortedList())
|
||||
}
|
||||
return []int64{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetAllocatableCPUs() []int64 {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) {
|
||||
if cm.cpuManager != nil {
|
||||
return int64Slice(cm.cpuManager.GetAllocatableCPUs().UnsortedList())
|
||||
}
|
||||
return []int64{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetMemory(_, _ string) []*podresourcesapi.ContainerMemory {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetAllocatableMemory() []*podresourcesapi.ContainerMemory {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetNodeAllocatableAbsolute() v1.ResourceList {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.DynamicResource {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
return false
|
||||
}
|
90
vendor/k8s.io/kubernetes/pkg/kubelet/cm/containermap/container_map.go
generated
vendored
Normal file
90
vendor/k8s.io/kubernetes/pkg/kubelet/cm/containermap/container_map.go
generated
vendored
Normal file
@ -0,0 +1,90 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package containermap
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// cmItem (ContainerMap ITEM) is a pair podUID, containerName
|
||||
type cmItem struct {
|
||||
podUID string
|
||||
containerName string
|
||||
}
|
||||
|
||||
// ContainerMap maps (containerID)->(podUID, containerName)
|
||||
type ContainerMap map[string]cmItem
|
||||
|
||||
// NewContainerMap creates a new ContainerMap struct
|
||||
func NewContainerMap() ContainerMap {
|
||||
return make(ContainerMap)
|
||||
}
|
||||
|
||||
// Clone creates a deep copy of the ContainerMap
|
||||
func (cm ContainerMap) Clone() ContainerMap {
|
||||
ret := make(ContainerMap, len(cm))
|
||||
for key, val := range cm {
|
||||
ret[key] = val
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// Add adds a mapping of (containerID)->(podUID, containerName) to the ContainerMap
|
||||
func (cm ContainerMap) Add(podUID, containerName, containerID string) {
|
||||
cm[containerID] = cmItem{
|
||||
podUID: podUID,
|
||||
containerName: containerName,
|
||||
}
|
||||
}
|
||||
|
||||
// RemoveByContainerID removes a mapping of (containerID)->(podUID, containerName) from the ContainerMap
|
||||
func (cm ContainerMap) RemoveByContainerID(containerID string) {
|
||||
delete(cm, containerID)
|
||||
}
|
||||
|
||||
// RemoveByContainerRef removes a mapping of (containerID)->(podUID, containerName) from the ContainerMap
|
||||
func (cm ContainerMap) RemoveByContainerRef(podUID, containerName string) {
|
||||
containerID, err := cm.GetContainerID(podUID, containerName)
|
||||
if err == nil {
|
||||
cm.RemoveByContainerID(containerID)
|
||||
}
|
||||
}
|
||||
|
||||
// GetContainerID retrieves a ContainerID from the ContainerMap
|
||||
func (cm ContainerMap) GetContainerID(podUID, containerName string) (string, error) {
|
||||
for key, val := range cm {
|
||||
if val.podUID == podUID && val.containerName == containerName {
|
||||
return key, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("container %s not in ContainerMap for pod %s", containerName, podUID)
|
||||
}
|
||||
|
||||
// GetContainerRef retrieves a (podUID, containerName) pair from the ContainerMap
|
||||
func (cm ContainerMap) GetContainerRef(containerID string) (string, string, error) {
|
||||
if _, exists := cm[containerID]; !exists {
|
||||
return "", "", fmt.Errorf("containerID %s not in ContainerMap", containerID)
|
||||
}
|
||||
return cm[containerID].podUID, cm[containerID].containerName, nil
|
||||
}
|
||||
|
||||
// Visit invoke visitor function to walks all of the entries in the container map
|
||||
func (cm ContainerMap) Visit(visitor func(podUID, containerName, containerID string)) {
|
||||
for k, v := range cm {
|
||||
visitor(v.podUID, v.containerName, k)
|
||||
}
|
||||
}
|
10
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/OWNERS
generated
vendored
Normal file
10
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/OWNERS
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- derekwaynecarr
|
||||
reviewers:
|
||||
- klueska
|
||||
emeritus_approvers:
|
||||
- balajismaniam
|
||||
- ConnorDoyle
|
||||
- vishh
|
1100
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_assignment.go
generated
vendored
Normal file
1100
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_assignment.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
525
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_manager.go
generated
vendored
Normal file
525
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_manager.go
generated
vendored
Normal file
@ -0,0 +1,525 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
type ActivePodsFunc func() []*v1.Pod
|
||||
|
||||
type runtimeService interface {
|
||||
UpdateContainerResources(ctx context.Context, id string, resources *runtimeapi.ContainerResources) error
|
||||
}
|
||||
|
||||
type policyName string
|
||||
|
||||
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||
const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||
type Manager interface {
|
||||
// Start is called during Kubelet initialization.
|
||||
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error
|
||||
|
||||
// Called to trigger the allocation of CPUs to a container. This must be
|
||||
// called at some point prior to the AddContainer() call for a container,
|
||||
// e.g. at pod admission time.
|
||||
Allocate(pod *v1.Pod, container *v1.Container) error
|
||||
|
||||
// AddContainer adds the mapping between container ID to pod UID and the container name
|
||||
// The mapping used to remove the CPU allocation during the container removal
|
||||
AddContainer(p *v1.Pod, c *v1.Container, containerID string)
|
||||
|
||||
// RemoveContainer is called after Kubelet decides to kill or delete a
|
||||
// container. After this call, the CPU manager stops trying to reconcile
|
||||
// that container and any CPUs dedicated to the container are freed.
|
||||
RemoveContainer(containerID string) error
|
||||
|
||||
// State returns a read-only interface to the internal CPU manager state.
|
||||
State() state.Reader
|
||||
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint
|
||||
|
||||
// GetExclusiveCPUs implements the podresources.CPUsProvider interface to provide
|
||||
// exclusively allocated cpus for the container
|
||||
GetExclusiveCPUs(podUID, containerName string) cpuset.CPUSet
|
||||
|
||||
// GetPodTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment per Pod
|
||||
// among this and other resource controllers.
|
||||
GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint
|
||||
|
||||
// GetAllocatableCPUs returns the total set of CPUs available for allocation.
|
||||
GetAllocatableCPUs() cpuset.CPUSet
|
||||
|
||||
// GetCPUAffinity returns cpuset which includes cpus from shared pools
|
||||
// as well as exclusively allocated cpus
|
||||
GetCPUAffinity(podUID, containerName string) cpuset.CPUSet
|
||||
|
||||
// GetAllCPUs returns all the CPUs known by cpumanager, as reported by the
|
||||
// hardware discovery. Maps to the CPU capacity.
|
||||
GetAllCPUs() cpuset.CPUSet
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
sync.Mutex
|
||||
policy Policy
|
||||
|
||||
// reconcilePeriod is the duration between calls to reconcileState.
|
||||
reconcilePeriod time.Duration
|
||||
|
||||
// state allows pluggable CPU assignment policies while sharing a common
|
||||
// representation of state for the system to inspect and reconcile.
|
||||
state state.State
|
||||
|
||||
// lastUpdatedstate holds state for each container from the last time it was updated.
|
||||
lastUpdateState state.State
|
||||
|
||||
// containerRuntime is the container runtime service interface needed
|
||||
// to make UpdateContainerResources() calls against the containers.
|
||||
containerRuntime runtimeService
|
||||
|
||||
// activePods is a method for listing active pods on the node
|
||||
// so all the containers can be updated in the reconciliation loop.
|
||||
activePods ActivePodsFunc
|
||||
|
||||
// podStatusProvider provides a method for obtaining pod statuses
|
||||
// and the containerID of their containers
|
||||
podStatusProvider status.PodStatusProvider
|
||||
|
||||
// containerMap provides a mapping from (pod, container) -> containerID
|
||||
// for all containers a pod
|
||||
containerMap containermap.ContainerMap
|
||||
|
||||
topology *topology.CPUTopology
|
||||
|
||||
nodeAllocatableReservation v1.ResourceList
|
||||
|
||||
// sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness.
|
||||
// We use it to determine when we can purge inactive pods from checkpointed state.
|
||||
sourcesReady config.SourcesReady
|
||||
|
||||
// stateFileDirectory holds the directory where the state file for checkpoints is held.
|
||||
stateFileDirectory string
|
||||
|
||||
// allCPUs is the set of online CPUs as reported by the system
|
||||
allCPUs cpuset.CPUSet
|
||||
|
||||
// allocatableCPUs is the set of online CPUs as reported by the system,
|
||||
// and available for allocation, minus the reserved set
|
||||
allocatableCPUs cpuset.CPUSet
|
||||
}
|
||||
|
||||
var _ Manager = &manager{}
|
||||
|
||||
type sourcesReadyStub struct{}
|
||||
|
||||
func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManager creates new cpu manager based on provided policy
|
||||
func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||
var topo *topology.CPUTopology
|
||||
var policy Policy
|
||||
var err error
|
||||
|
||||
topo, err = topology.Discover(machineInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch policyName(cpuPolicyName) {
|
||||
|
||||
case PolicyNone:
|
||||
policy, err = NewNonePolicy(cpuPolicyOptions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new none policy error: %w", err)
|
||||
}
|
||||
|
||||
case PolicyStatic:
|
||||
klog.InfoS("Detected CPU topology", "topology", topo)
|
||||
|
||||
reservedCPUs, ok := nodeAllocatableReservation[v1.ResourceCPU]
|
||||
if !ok {
|
||||
// The static policy cannot initialize without this information.
|
||||
return nil, fmt.Errorf("[cpumanager] unable to determine reserved CPU resources for static policy")
|
||||
}
|
||||
if reservedCPUs.IsZero() {
|
||||
// The static policy requires this to be nonzero. Zero CPU reservation
|
||||
// would allow the shared pool to be completely exhausted. At that point
|
||||
// either we would violate our guarantee of exclusivity or need to evict
|
||||
// any pod that has at least one container that requires zero CPUs.
|
||||
// See the comments in policy_static.go for more details.
|
||||
return nil, fmt.Errorf("[cpumanager] the static policy requires systemreserved.cpu + kubereserved.cpu to be greater than zero")
|
||||
}
|
||||
|
||||
// Take the ceiling of the reservation, since fractional CPUs cannot be
|
||||
// exclusively allocated.
|
||||
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||
policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %w", err)
|
||||
}
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown policy: \"%s\"", cpuPolicyName)
|
||||
}
|
||||
|
||||
manager := &manager{
|
||||
policy: policy,
|
||||
reconcilePeriod: reconcilePeriod,
|
||||
lastUpdateState: state.NewMemoryState(),
|
||||
topology: topo,
|
||||
nodeAllocatableReservation: nodeAllocatableReservation,
|
||||
stateFileDirectory: stateFileDirectory,
|
||||
allCPUs: topo.CPUDetails.CPUs(),
|
||||
}
|
||||
manager.sourcesReady = &sourcesReadyStub{}
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
klog.InfoS("Starting CPU manager", "policy", m.policy.Name())
|
||||
klog.InfoS("Reconciling", "reconcilePeriod", m.reconcilePeriod)
|
||||
m.sourcesReady = sourcesReady
|
||||
m.activePods = activePods
|
||||
m.podStatusProvider = podStatusProvider
|
||||
m.containerRuntime = containerRuntime
|
||||
m.containerMap = initialContainers
|
||||
|
||||
stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, cpuManagerStateFileName, m.policy.Name(), m.containerMap)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Could not initialize checkpoint manager, please drain node and remove policy state file")
|
||||
return err
|
||||
}
|
||||
m.state = stateImpl
|
||||
|
||||
err = m.policy.Start(m.state)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Policy start error")
|
||||
return err
|
||||
}
|
||||
|
||||
m.allocatableCPUs = m.policy.GetAllocatableCPUs(m.state)
|
||||
|
||||
if m.policy.Name() == string(PolicyNone) {
|
||||
return nil
|
||||
}
|
||||
// Periodically call m.reconcileState() to continue to keep the CPU sets of
|
||||
// all pods in sync with and guaranteed CPUs handed out among them.
|
||||
go wait.Until(func() { m.reconcileState() }, m.reconcilePeriod, wait.NeverStop)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error {
|
||||
// Garbage collect any stranded resources before allocating CPUs.
|
||||
m.removeStaleState()
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
// Call down into the policy to assign this container CPUs if required.
|
||||
err := m.policy.Allocate(m.state, p, c)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Allocate error")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
if cset, exists := m.state.GetCPUSet(string(pod.UID), container.Name); exists {
|
||||
m.lastUpdateState.SetCPUSet(string(pod.UID), container.Name, cset)
|
||||
}
|
||||
m.containerMap.Add(string(pod.UID), container.Name, containerID)
|
||||
}
|
||||
|
||||
func (m *manager) RemoveContainer(containerID string) error {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
err := m.policyRemoveContainerByID(containerID)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "RemoveContainer error")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) policyRemoveContainerByID(containerID string) error {
|
||||
podUID, containerName, err := m.containerMap.GetContainerRef(containerID)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = m.policy.RemoveContainer(m.state, podUID, containerName)
|
||||
if err == nil {
|
||||
m.lastUpdateState.Delete(podUID, containerName)
|
||||
m.containerMap.RemoveByContainerID(containerID)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) error {
|
||||
err := m.policy.RemoveContainer(m.state, podUID, containerName)
|
||||
if err == nil {
|
||||
m.lastUpdateState.Delete(podUID, containerName)
|
||||
m.containerMap.RemoveByContainerRef(podUID, containerName)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *manager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
// Garbage collect any stranded resources before providing TopologyHints
|
||||
m.removeStaleState()
|
||||
// Delegate to active policy
|
||||
return m.policy.GetTopologyHints(m.state, pod, container)
|
||||
}
|
||||
|
||||
func (m *manager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
// Garbage collect any stranded resources before providing TopologyHints
|
||||
m.removeStaleState()
|
||||
// Delegate to active policy
|
||||
return m.policy.GetPodTopologyHints(m.state, pod)
|
||||
}
|
||||
|
||||
func (m *manager) GetAllocatableCPUs() cpuset.CPUSet {
|
||||
return m.allocatableCPUs.Clone()
|
||||
}
|
||||
|
||||
func (m *manager) GetAllCPUs() cpuset.CPUSet {
|
||||
return m.allCPUs.Clone()
|
||||
}
|
||||
|
||||
type reconciledContainer struct {
|
||||
podName string
|
||||
containerName string
|
||||
containerID string
|
||||
}
|
||||
|
||||
func (m *manager) removeStaleState() {
|
||||
// Only once all sources are ready do we attempt to remove any stale state.
|
||||
// This ensures that the call to `m.activePods()` below will succeed with
|
||||
// the actual active pods list.
|
||||
if !m.sourcesReady.AllReady() {
|
||||
return
|
||||
}
|
||||
|
||||
// We grab the lock to ensure that no new containers will grab CPUs while
|
||||
// executing the code below. Without this lock, its possible that we end up
|
||||
// removing state that is newly added by an asynchronous call to
|
||||
// AddContainer() during the execution of this code.
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
// Get the list of active pods.
|
||||
activePods := m.activePods()
|
||||
|
||||
// Build a list of (podUID, containerName) pairs for all containers in all active Pods.
|
||||
activeContainers := make(map[string]map[string]struct{})
|
||||
for _, pod := range activePods {
|
||||
activeContainers[string(pod.UID)] = make(map[string]struct{})
|
||||
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
|
||||
activeContainers[string(pod.UID)][container.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// Loop through the CPUManager state. Remove any state for containers not
|
||||
// in the `activeContainers` list built above.
|
||||
assignments := m.state.GetCPUAssignments()
|
||||
for podUID := range assignments {
|
||||
for containerName := range assignments[podUID] {
|
||||
if _, ok := activeContainers[podUID][containerName]; ok {
|
||||
klog.V(5).InfoS("RemoveStaleState: container still active", "podUID", podUID, "containerName", containerName)
|
||||
continue
|
||||
}
|
||||
klog.V(2).InfoS("RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName)
|
||||
err := m.policyRemoveContainerByRef(podUID, containerName)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m.containerMap.Visit(func(podUID, containerName, containerID string) {
|
||||
if _, ok := activeContainers[podUID][containerName]; ok {
|
||||
klog.V(5).InfoS("RemoveStaleState: containerMap: container still active", "podUID", podUID, "containerName", containerName)
|
||||
return
|
||||
}
|
||||
klog.V(2).InfoS("RemoveStaleState: containerMap: removing container", "podUID", podUID, "containerName", containerName)
|
||||
err := m.policyRemoveContainerByRef(podUID, containerName)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "RemoveStaleState: containerMap: failed to remove container", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (m *manager) reconcileState() (success []reconciledContainer, failure []reconciledContainer) {
|
||||
ctx := context.Background()
|
||||
success = []reconciledContainer{}
|
||||
failure = []reconciledContainer{}
|
||||
|
||||
m.removeStaleState()
|
||||
for _, pod := range m.activePods() {
|
||||
pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID)
|
||||
if !ok {
|
||||
klog.V(5).InfoS("ReconcileState: skipping pod; status not found", "pod", klog.KObj(pod))
|
||||
failure = append(failure, reconciledContainer{pod.Name, "", ""})
|
||||
continue
|
||||
}
|
||||
|
||||
allContainers := pod.Spec.InitContainers
|
||||
allContainers = append(allContainers, pod.Spec.Containers...)
|
||||
for _, container := range allContainers {
|
||||
containerID, err := findContainerIDByName(&pstatus, container.Name)
|
||||
if err != nil {
|
||||
klog.V(5).InfoS("ReconcileState: skipping container; ID not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
|
||||
continue
|
||||
}
|
||||
|
||||
cstatus, err := findContainerStatusByName(&pstatus, container.Name)
|
||||
if err != nil {
|
||||
klog.V(5).InfoS("ReconcileState: skipping container; container status not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
|
||||
continue
|
||||
}
|
||||
|
||||
if cstatus.State.Waiting != nil ||
|
||||
(cstatus.State.Waiting == nil && cstatus.State.Running == nil && cstatus.State.Terminated == nil) {
|
||||
klog.V(4).InfoS("ReconcileState: skipping container; container still in the waiting state", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
|
||||
continue
|
||||
}
|
||||
|
||||
m.Lock()
|
||||
if cstatus.State.Terminated != nil {
|
||||
// The container is terminated but we can't call m.RemoveContainer()
|
||||
// here because it could remove the allocated cpuset for the container
|
||||
// which may be in the process of being restarted. That would result
|
||||
// in the container losing any exclusively-allocated CPUs that it
|
||||
// was allocated.
|
||||
_, _, err := m.containerMap.GetContainerRef(containerID)
|
||||
if err == nil {
|
||||
klog.V(4).InfoS("ReconcileState: ignoring terminated container", "pod", klog.KObj(pod), "containerID", containerID)
|
||||
}
|
||||
m.Unlock()
|
||||
continue
|
||||
}
|
||||
|
||||
// Once we make it here we know we have a running container.
|
||||
// Idempotently add it to the containerMap incase it is missing.
|
||||
// This can happen after a kubelet restart, for example.
|
||||
m.containerMap.Add(string(pod.UID), container.Name, containerID)
|
||||
m.Unlock()
|
||||
|
||||
cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name)
|
||||
if cset.IsEmpty() {
|
||||
// NOTE: This should not happen outside of tests.
|
||||
klog.V(2).InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||
continue
|
||||
}
|
||||
|
||||
lcset := m.lastUpdateState.GetCPUSetOrDefault(string(pod.UID), container.Name)
|
||||
if !cset.Equals(lcset) {
|
||||
klog.V(5).InfoS("ReconcileState: updating container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)
|
||||
err = m.updateContainerCPUSet(ctx, containerID, cset)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "ReconcileState: failed to update container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||
continue
|
||||
}
|
||||
m.lastUpdateState.SetCPUSet(string(pod.UID), container.Name, cset)
|
||||
}
|
||||
success = append(success, reconciledContainer{pod.Name, container.Name, containerID})
|
||||
}
|
||||
}
|
||||
return success, failure
|
||||
}
|
||||
|
||||
func findContainerIDByName(status *v1.PodStatus, name string) (string, error) {
|
||||
allStatuses := status.InitContainerStatuses
|
||||
allStatuses = append(allStatuses, status.ContainerStatuses...)
|
||||
for _, container := range allStatuses {
|
||||
if container.Name == name && container.ContainerID != "" {
|
||||
cid := &kubecontainer.ContainerID{}
|
||||
err := cid.ParseString(container.ContainerID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return cid.ID, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("unable to find ID for container with name %v in pod status (it may not be running)", name)
|
||||
}
|
||||
|
||||
func findContainerStatusByName(status *v1.PodStatus, name string) (*v1.ContainerStatus, error) {
|
||||
for _, containerStatus := range append(status.InitContainerStatuses, status.ContainerStatuses...) {
|
||||
if containerStatus.Name == name {
|
||||
return &containerStatus, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("unable to find status for container with name %v in pod status (it may not be running)", name)
|
||||
}
|
||||
|
||||
func (m *manager) GetExclusiveCPUs(podUID, containerName string) cpuset.CPUSet {
|
||||
if result, ok := m.state.GetCPUSet(podUID, containerName); ok {
|
||||
return result
|
||||
}
|
||||
|
||||
return cpuset.CPUSet{}
|
||||
}
|
||||
|
||||
func (m *manager) GetCPUAffinity(podUID, containerName string) cpuset.CPUSet {
|
||||
return m.state.GetCPUSetOrDefault(podUID, containerName)
|
||||
}
|
43
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_manager_others.go
generated
vendored
Normal file
43
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_manager_others.go
generated
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
func (m *manager) updateContainerCPUSet(ctx context.Context, containerID string, cpus cpuset.CPUSet) error {
|
||||
// TODO: Consider adding a `ResourceConfigForContainer` helper in
|
||||
// helpers_linux.go similar to what exists for pods.
|
||||
// It would be better to pass the full container resources here instead of
|
||||
// this patch-like partial resources.
|
||||
|
||||
return m.containerRuntime.UpdateContainerResources(
|
||||
ctx,
|
||||
containerID,
|
||||
&runtimeapi.ContainerResources{
|
||||
Linux: &runtimeapi.LinuxContainerResources{
|
||||
CpusetCpus: cpus.String(),
|
||||
},
|
||||
})
|
||||
}
|
49
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_manager_windows.go
generated
vendored
Normal file
49
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/cpu_manager_windows.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/winstats"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
func (m *manager) updateContainerCPUSet(ctx context.Context, containerID string, cpus cpuset.CPUSet) error {
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) {
|
||||
return nil
|
||||
}
|
||||
|
||||
affinities := winstats.CpusToGroupAffinity(cpus.List())
|
||||
var cpuGroupAffinities []*runtimeapi.WindowsCpuGroupAffinity
|
||||
for _, affinity := range affinities {
|
||||
cpuGroupAffinities = append(cpuGroupAffinities, &runtimeapi.WindowsCpuGroupAffinity{
|
||||
CpuGroup: uint32(affinity.Group),
|
||||
CpuMask: uint64(affinity.Mask),
|
||||
})
|
||||
}
|
||||
return m.containerRuntime.UpdateContainerResources(ctx, containerID, &runtimeapi.ContainerResources{
|
||||
Windows: &runtimeapi.WindowsContainerResources{
|
||||
AffinityCpus: cpuGroupAffinities,
|
||||
},
|
||||
})
|
||||
}
|
98
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
generated
vendored
Normal file
98
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
generated
vendored
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
type fakeManager struct {
|
||||
state state.State
|
||||
}
|
||||
|
||||
func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
klog.InfoS("Start()")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) Policy() Policy {
|
||||
klog.InfoS("Policy()")
|
||||
pol, _ := NewNonePolicy(nil)
|
||||
return pol
|
||||
}
|
||||
|
||||
func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||
klog.InfoS("Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
|
||||
klog.InfoS("AddContainer", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID)
|
||||
}
|
||||
|
||||
func (m *fakeManager) RemoveContainer(containerID string) error {
|
||||
klog.InfoS("RemoveContainer", "containerID", containerID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
klog.InfoS("Get container topology hints")
|
||||
return map[string][]topologymanager.TopologyHint{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
klog.InfoS("Get pod topology hints")
|
||||
return map[string][]topologymanager.TopologyHint{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetExclusiveCPUs(podUID, containerName string) cpuset.CPUSet {
|
||||
klog.InfoS("GetExclusiveCPUs", "podUID", podUID, "containerName", containerName)
|
||||
return cpuset.CPUSet{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetAllocatableCPUs() cpuset.CPUSet {
|
||||
klog.InfoS("Get Allocatable CPUs")
|
||||
return cpuset.CPUSet{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetCPUAffinity(podUID, containerName string) cpuset.CPUSet {
|
||||
klog.InfoS("GetCPUAffinity", "podUID", podUID, "containerName", containerName)
|
||||
return cpuset.CPUSet{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetAllCPUs() cpuset.CPUSet {
|
||||
klog.InfoS("GetAllCPUs")
|
||||
return cpuset.CPUSet{}
|
||||
}
|
||||
|
||||
// NewFakeManager creates empty/fake cpu manager
|
||||
func NewFakeManager() Manager {
|
||||
return &fakeManager{
|
||||
state: state.NewMemoryState(),
|
||||
}
|
||||
}
|
45
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy.go
generated
vendored
Normal file
45
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
// Policy implements logic for pod container to CPU assignment.
|
||||
type Policy interface {
|
||||
Name() string
|
||||
Start(s state.State) error
|
||||
// Allocate call is idempotent
|
||||
Allocate(s state.State, pod *v1.Pod, container *v1.Container) error
|
||||
// RemoveContainer call is idempotent
|
||||
RemoveContainer(s state.State, podUID string, containerName string) error
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint
|
||||
// GetPodTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment per Pod
|
||||
// among this and other resource controllers.
|
||||
GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint
|
||||
// GetAllocatableCPUs returns the total set of CPUs available for allocation.
|
||||
GetAllocatableCPUs(m state.State) cpuset.CPUSet
|
||||
}
|
76
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy_none.go
generated
vendored
Normal file
76
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy_none.go
generated
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
type nonePolicy struct{}
|
||||
|
||||
var _ Policy = &nonePolicy{}
|
||||
|
||||
// PolicyNone name of none policy
|
||||
const PolicyNone policyName = "none"
|
||||
|
||||
// NewNonePolicy returns a cpuset manager policy that does nothing
|
||||
func NewNonePolicy(cpuPolicyOptions map[string]string) (Policy, error) {
|
||||
if len(cpuPolicyOptions) > 0 {
|
||||
return nil, fmt.Errorf("None policy: received unsupported options=%v", cpuPolicyOptions)
|
||||
}
|
||||
return &nonePolicy{}, nil
|
||||
}
|
||||
|
||||
func (p *nonePolicy) Name() string {
|
||||
return string(PolicyNone)
|
||||
}
|
||||
|
||||
func (p *nonePolicy) Start(s state.State) error {
|
||||
klog.InfoS("None policy: Start")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *nonePolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *nonePolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *nonePolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *nonePolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Assignable CPUs are the ones that can be exclusively allocated to pods that meet the exclusivity requirement
|
||||
// (ie guaranteed QoS class and integral CPU request).
|
||||
// Assignability of CPUs as a concept is only applicable in case of static policy i.e. scenarios where workloads
|
||||
// CAN get exclusive access to core(s).
|
||||
// Hence, we return empty set here: no cpus are assignable according to above definition with this policy.
|
||||
func (p *nonePolicy) GetAllocatableCPUs(m state.State) cpuset.CPUSet {
|
||||
return cpuset.New()
|
||||
}
|
185
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy_options.go
generated
vendored
Normal file
185
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy_options.go
generated
vendored
Normal file
@ -0,0 +1,185 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
)
|
||||
|
||||
// Names of the options, as part of the user interface.
|
||||
const (
|
||||
FullPCPUsOnlyOption string = "full-pcpus-only"
|
||||
DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa"
|
||||
AlignBySocketOption string = "align-by-socket"
|
||||
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
|
||||
StrictCPUReservationOption string = "strict-cpu-reservation"
|
||||
PreferAlignByUnCoreCacheOption string = "prefer-align-cpus-by-uncorecache"
|
||||
)
|
||||
|
||||
var (
|
||||
alphaOptions = sets.New[string](
|
||||
DistributeCPUsAcrossNUMAOption,
|
||||
AlignBySocketOption,
|
||||
DistributeCPUsAcrossCoresOption,
|
||||
StrictCPUReservationOption,
|
||||
PreferAlignByUnCoreCacheOption,
|
||||
)
|
||||
betaOptions = sets.New[string](
|
||||
FullPCPUsOnlyOption,
|
||||
)
|
||||
stableOptions = sets.New[string]()
|
||||
)
|
||||
|
||||
// CheckPolicyOptionAvailable verifies if the given option can be used depending on the Feature Gate Settings.
|
||||
// returns nil on success, or an error describing the failure on error.
|
||||
func CheckPolicyOptionAvailable(option string) error {
|
||||
if !alphaOptions.Has(option) && !betaOptions.Has(option) && !stableOptions.Has(option) {
|
||||
return fmt.Errorf("unknown CPU Manager Policy option: %q", option)
|
||||
}
|
||||
|
||||
if alphaOptions.Has(option) && !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManagerPolicyAlphaOptions) {
|
||||
return fmt.Errorf("CPU Manager Policy Alpha-level Options not enabled, but option %q provided", option)
|
||||
}
|
||||
|
||||
if betaOptions.Has(option) && !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManagerPolicyBetaOptions) {
|
||||
return fmt.Errorf("CPU Manager Policy Beta-level Options not enabled, but option %q provided", option)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// StaticPolicyOptions holds the parsed value of the policy options, ready to be consumed internally.
|
||||
type StaticPolicyOptions struct {
|
||||
// flag to enable extra allocation restrictions to avoid
|
||||
// different containers to possibly end up on the same core.
|
||||
// we consider "core" and "physical CPU" synonim here, leaning
|
||||
// towards the terminoloy k8s hints. We acknowledge this is confusing.
|
||||
//
|
||||
// looking at https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/,
|
||||
// any possible naming scheme will lead to ambiguity to some extent.
|
||||
// We picked "pcpu" because it the established docs hints at vCPU already.
|
||||
FullPhysicalCPUsOnly bool
|
||||
// Flag to evenly distribute CPUs across NUMA nodes in cases where more
|
||||
// than one NUMA node is required to satisfy the allocation.
|
||||
DistributeCPUsAcrossNUMA bool
|
||||
// Flag to ensure CPUs are considered aligned at socket boundary rather than
|
||||
// NUMA boundary
|
||||
AlignBySocket bool
|
||||
// flag to enable extra allocation restrictions to spread
|
||||
// cpus (HT) on different physical core.
|
||||
// This is a preferred policy so do not throw error if they have to packed in one physical core.
|
||||
DistributeCPUsAcrossCores bool
|
||||
// Flag to remove reserved cores from the list of available cores
|
||||
StrictCPUReservation bool
|
||||
// Flag that makes best-effort to align CPUs to a uncorecache boundary
|
||||
// As long as there are CPUs available, pods will be admitted if the condition is not met.
|
||||
PreferAlignByUncoreCacheOption bool
|
||||
}
|
||||
|
||||
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
|
||||
func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOptions, error) {
|
||||
opts := StaticPolicyOptions{}
|
||||
for name, value := range policyOptions {
|
||||
if err := CheckPolicyOptionAvailable(name); err != nil {
|
||||
return opts, err
|
||||
}
|
||||
|
||||
switch name {
|
||||
case FullPCPUsOnlyOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.FullPhysicalCPUsOnly = optValue
|
||||
case DistributeCPUsAcrossNUMAOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.DistributeCPUsAcrossNUMA = optValue
|
||||
case AlignBySocketOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.AlignBySocket = optValue
|
||||
case DistributeCPUsAcrossCoresOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.DistributeCPUsAcrossCores = optValue
|
||||
case StrictCPUReservationOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.StrictCPUReservation = optValue
|
||||
case PreferAlignByUnCoreCacheOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.PreferAlignByUncoreCacheOption = optValue
|
||||
default:
|
||||
// this should never be reached, we already detect unknown options,
|
||||
// but we keep it as further safety.
|
||||
return opts, fmt.Errorf("unsupported cpumanager option: %q (%s)", name, value)
|
||||
}
|
||||
}
|
||||
|
||||
if opts.FullPhysicalCPUsOnly && opts.DistributeCPUsAcrossCores {
|
||||
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", FullPCPUsOnlyOption, DistributeCPUsAcrossCoresOption)
|
||||
}
|
||||
|
||||
// TODO(@Jeffwan): Remove this check after more compatibility tests are done.
|
||||
if opts.DistributeCPUsAcrossNUMA && opts.DistributeCPUsAcrossCores {
|
||||
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", DistributeCPUsAcrossNUMAOption, DistributeCPUsAcrossCoresOption)
|
||||
}
|
||||
|
||||
if opts.PreferAlignByUncoreCacheOption && opts.DistributeCPUsAcrossCores {
|
||||
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", PreferAlignByUnCoreCacheOption, DistributeCPUsAcrossCoresOption)
|
||||
}
|
||||
|
||||
if opts.PreferAlignByUncoreCacheOption && opts.DistributeCPUsAcrossNUMA {
|
||||
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", PreferAlignByUnCoreCacheOption, DistributeCPUsAcrossNUMAOption)
|
||||
}
|
||||
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
// ValidateStaticPolicyOptions ensures that the requested policy options are compatible with the machine on which the CPUManager is running.
|
||||
func ValidateStaticPolicyOptions(opts StaticPolicyOptions, topology *topology.CPUTopology, topologyManager topologymanager.Store) error {
|
||||
if opts.AlignBySocket {
|
||||
// Not compatible with topology manager single-numa-node policy option.
|
||||
if topologyManager.GetPolicy().Name() == topologymanager.PolicySingleNumaNode {
|
||||
return fmt.Errorf("Topolgy manager %s policy is incompatible with CPUManager %s policy option", topologymanager.PolicySingleNumaNode, AlignBySocketOption)
|
||||
}
|
||||
// Not compatible with topology when number of sockets are more than number of NUMA nodes.
|
||||
if topology.NumSockets > topology.NumNUMANodes {
|
||||
return fmt.Errorf("Align by socket is not compatible with hardware where number of sockets are more than number of NUMA")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
766
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy_static.go
generated
vendored
Normal file
766
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/policy_static.go
generated
vendored
Normal file
@ -0,0 +1,766 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
// PolicyStatic is the name of the static policy.
|
||||
// Should options be given, these will be ignored and backward (up to 1.21 included)
|
||||
// compatible behaviour will be enforced
|
||||
PolicyStatic policyName = "static"
|
||||
// ErrorSMTAlignment represents the type of an SMTAlignmentError
|
||||
ErrorSMTAlignment = "SMTAlignmentError"
|
||||
)
|
||||
|
||||
// SMTAlignmentError represents an error due to SMT alignment
|
||||
type SMTAlignmentError struct {
|
||||
RequestedCPUs int
|
||||
CpusPerCore int
|
||||
AvailablePhysicalCPUs int
|
||||
CausedByPhysicalCPUs bool
|
||||
}
|
||||
|
||||
func (e SMTAlignmentError) Error() string {
|
||||
if e.CausedByPhysicalCPUs {
|
||||
return fmt.Sprintf("SMT Alignment Error: not enough free physical CPUs: available physical CPUs = %d, requested CPUs = %d, CPUs per core = %d", e.AvailablePhysicalCPUs, e.RequestedCPUs, e.CpusPerCore)
|
||||
}
|
||||
return fmt.Sprintf("SMT Alignment Error: requested %d cpus not multiple cpus per core = %d", e.RequestedCPUs, e.CpusPerCore)
|
||||
}
|
||||
|
||||
// Type returns human-readable type of this error. Used in the admission control to populate Admission Failure reason.
|
||||
func (e SMTAlignmentError) Type() string {
|
||||
return ErrorSMTAlignment
|
||||
}
|
||||
|
||||
// staticPolicy is a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
//
|
||||
// This policy allocates CPUs exclusively for a container if all the following
|
||||
// conditions are met:
|
||||
//
|
||||
// - The pod QoS class is Guaranteed.
|
||||
// - The CPU request is a positive integer.
|
||||
//
|
||||
// The static policy maintains the following sets of logical CPUs:
|
||||
//
|
||||
// - SHARED: Burstable, BestEffort, and non-integral Guaranteed containers
|
||||
// run here. Initially this contains all CPU IDs on the system. As
|
||||
// exclusive allocations are created and destroyed, this CPU set shrinks
|
||||
// and grows, accordingly. This is stored in the state as the default
|
||||
// CPU set.
|
||||
//
|
||||
// - RESERVED: A subset of the shared pool which is not exclusively
|
||||
// allocatable. The membership of this pool is static for the lifetime of
|
||||
// the Kubelet. The size of the reserved pool is
|
||||
// ceil(systemreserved.cpu + kubereserved.cpu).
|
||||
// Reserved CPUs are taken topologically starting with lowest-indexed
|
||||
// physical core, as reported by cAdvisor.
|
||||
//
|
||||
// - ASSIGNABLE: Equal to SHARED - RESERVED. Exclusive CPUs are allocated
|
||||
// from this pool.
|
||||
//
|
||||
// - EXCLUSIVE ALLOCATIONS: CPU sets assigned exclusively to one container.
|
||||
// These are stored as explicit assignments in the state.
|
||||
//
|
||||
// When an exclusive allocation is made, the static policy also updates the
|
||||
// default cpuset in the state abstraction. The CPU manager's periodic
|
||||
// reconcile loop takes care of rewriting the cpuset in cgroupfs for any
|
||||
// containers that may be running in the shared pool. For this reason,
|
||||
// applications running within exclusively-allocated containers must tolerate
|
||||
// potentially sharing their allocated CPUs for up to the CPU manager
|
||||
// reconcile period.
|
||||
type staticPolicy struct {
|
||||
// cpu socket topology
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reservedCPUs cpuset.CPUSet
|
||||
// Superset of reservedCPUs. It includes not just the reservedCPUs themselves,
|
||||
// but also any siblings of those reservedCPUs on the same physical die.
|
||||
// NOTE: If the reserved set includes full physical CPUs from the beginning
|
||||
// (e.g. only reserved pairs of core siblings) this set is expected to be
|
||||
// identical to the reserved set.
|
||||
reservedPhysicalCPUs cpuset.CPUSet
|
||||
// topology manager reference to get container Topology affinity
|
||||
affinity topologymanager.Store
|
||||
// set of CPUs to reuse across allocations in a pod
|
||||
cpusToReuse map[string]cpuset.CPUSet
|
||||
// options allow to fine-tune the behaviour of the policy
|
||||
options StaticPolicyOptions
|
||||
// we compute this value multiple time, and it's not supposed to change
|
||||
// at runtime - the cpumanager can't deal with runtime topology changes anyway.
|
||||
cpuGroupSize int
|
||||
}
|
||||
|
||||
// Ensure staticPolicy implements Policy interface
|
||||
var _ Policy = &staticPolicy{}
|
||||
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string) (Policy, error) {
|
||||
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = ValidateStaticPolicyOptions(opts, topology, affinity)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cpuGroupSize := topology.CPUsPerCore()
|
||||
klog.InfoS("Static policy created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize)
|
||||
|
||||
policy := &staticPolicy{
|
||||
topology: topology,
|
||||
affinity: affinity,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
options: opts,
|
||||
cpuGroupSize: cpuGroupSize,
|
||||
}
|
||||
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
var reserved cpuset.CPUSet
|
||||
if reservedCPUs.Size() > 0 {
|
||||
reserved = reservedCPUs
|
||||
} else {
|
||||
// takeByTopology allocates CPUs associated with low-numbered cores from
|
||||
// allCPUs.
|
||||
//
|
||||
// For example: Given a system with 8 CPUs available and HT enabled,
|
||||
// if numReservedCPUs=2, then reserved={0,4}
|
||||
reserved, _ = policy.takeByTopology(allCPUs, numReservedCPUs)
|
||||
}
|
||||
|
||||
if reserved.Size() != numReservedCPUs {
|
||||
err := fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of %s did not equal %d)", reserved, numReservedCPUs)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var reservedPhysicalCPUs cpuset.CPUSet
|
||||
for _, cpu := range reserved.UnsortedList() {
|
||||
core, err := topology.CPUCoreID(cpu)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[cpumanager] unable to build the reserved physical CPUs from the reserved set: %w", err)
|
||||
}
|
||||
reservedPhysicalCPUs = reservedPhysicalCPUs.Union(topology.CPUDetails.CPUsInCores(core))
|
||||
}
|
||||
|
||||
klog.InfoS("Reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved, "reservedPhysicalCPUs", reservedPhysicalCPUs)
|
||||
policy.reservedCPUs = reserved
|
||||
policy.reservedPhysicalCPUs = reservedPhysicalCPUs
|
||||
|
||||
return policy, nil
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Name() string {
|
||||
return string(PolicyStatic)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Start(s state.State) error {
|
||||
if err := p.validateState(s); err != nil {
|
||||
klog.ErrorS(err, "Static policy invalid state, please drain node and remove policy state file")
|
||||
return err
|
||||
}
|
||||
p.initializeMetrics(s)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *staticPolicy) validateState(s state.State) error {
|
||||
tmpAssignments := s.GetCPUAssignments()
|
||||
tmpDefaultCPUset := s.GetDefaultCPUSet()
|
||||
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
if p.options.StrictCPUReservation {
|
||||
allCPUs = allCPUs.Difference(p.reservedCPUs)
|
||||
}
|
||||
|
||||
// Default cpuset cannot be empty when assignments exist
|
||||
if tmpDefaultCPUset.IsEmpty() {
|
||||
if len(tmpAssignments) != 0 {
|
||||
return fmt.Errorf("default cpuset cannot be empty")
|
||||
}
|
||||
// state is empty initialize
|
||||
s.SetDefaultCPUSet(allCPUs)
|
||||
klog.InfoS("Static policy initialized", "defaultCPUSet", allCPUs)
|
||||
return nil
|
||||
}
|
||||
|
||||
// State has already been initialized from file (is not empty)
|
||||
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||
// - user tampered with file
|
||||
if p.options.StrictCPUReservation {
|
||||
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).IsEmpty() {
|
||||
return fmt.Errorf("some of strictly reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
p.reservedCPUs.Intersection(tmpDefaultCPUset).String(), tmpDefaultCPUset.String())
|
||||
}
|
||||
} else {
|
||||
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Check if state for static policy is consistent
|
||||
for pod := range tmpAssignments {
|
||||
for container, cset := range tmpAssignments[pod] {
|
||||
// None of the cpu in DEFAULT cset should be in s.assignments
|
||||
if !tmpDefaultCPUset.Intersection(cset).IsEmpty() {
|
||||
return fmt.Errorf("pod: %s, container: %s cpuset: \"%s\" overlaps with default cpuset \"%s\"",
|
||||
pod, container, cset.String(), tmpDefaultCPUset.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. It's possible that the set of available CPUs has changed since
|
||||
// the state was written. This can be due to for example
|
||||
// offlining a CPU when kubelet is not running. If this happens,
|
||||
// CPU manager will run into trouble when later it tries to
|
||||
// assign non-existent CPUs to containers. Validate that the
|
||||
// topology that was received during CPU manager startup matches with
|
||||
// the set of CPUs stored in the state.
|
||||
totalKnownCPUs := tmpDefaultCPUset.Clone()
|
||||
tmpCPUSets := []cpuset.CPUSet{}
|
||||
for pod := range tmpAssignments {
|
||||
for _, cset := range tmpAssignments[pod] {
|
||||
tmpCPUSets = append(tmpCPUSets, cset)
|
||||
}
|
||||
}
|
||||
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
|
||||
if !totalKnownCPUs.Equals(allCPUs) {
|
||||
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||
allCPUs.String(), totalKnownCPUs.String())
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllocatableCPUs returns the total set of CPUs available for allocation.
|
||||
func (p *staticPolicy) GetAllocatableCPUs(s state.State) cpuset.CPUSet {
|
||||
return p.topology.CPUDetails.CPUs().Difference(p.reservedCPUs)
|
||||
}
|
||||
|
||||
// GetAvailableCPUs returns the set of unassigned CPUs minus the reserved set.
|
||||
func (p *staticPolicy) GetAvailableCPUs(s state.State) cpuset.CPUSet {
|
||||
return s.GetDefaultCPUSet().Difference(p.reservedCPUs)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) GetAvailablePhysicalCPUs(s state.State) cpuset.CPUSet {
|
||||
return s.GetDefaultCPUSet().Difference(p.reservedPhysicalCPUs)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, cset cpuset.CPUSet) {
|
||||
// If pod entries to m.cpusToReuse other than the current pod exist, delete them.
|
||||
for podUID := range p.cpusToReuse {
|
||||
if podUID != string(pod.UID) {
|
||||
delete(p.cpusToReuse, podUID)
|
||||
}
|
||||
}
|
||||
// If no cpuset exists for cpusToReuse by this pod yet, create one.
|
||||
if _, ok := p.cpusToReuse[string(pod.UID)]; !ok {
|
||||
p.cpusToReuse[string(pod.UID)] = cpuset.New()
|
||||
}
|
||||
// Check if the container is an init container.
|
||||
// If so, add its cpuset to the cpuset of reusable CPUs for any new allocations.
|
||||
for _, initContainer := range pod.Spec.InitContainers {
|
||||
if container.Name == initContainer.Name {
|
||||
if podutil.IsRestartableInitContainer(&initContainer) {
|
||||
// If the container is a restartable init container, we should not
|
||||
// reuse its cpuset, as a restartable init container can run with
|
||||
// regular containers.
|
||||
break
|
||||
}
|
||||
p.cpusToReuse[string(pod.UID)] = p.cpusToReuse[string(pod.UID)].Union(cset)
|
||||
return
|
||||
}
|
||||
}
|
||||
// Otherwise it is an app container.
|
||||
// Remove its cpuset from the cpuset of reusable CPUs for any new allocations.
|
||||
p.cpusToReuse[string(pod.UID)] = p.cpusToReuse[string(pod.UID)].Difference(cset)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
|
||||
numCPUs := p.guaranteedCPUs(pod, container)
|
||||
if numCPUs == 0 {
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.InfoS("Static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
// container belongs in an exclusively allocated pool
|
||||
metrics.CPUManagerPinningRequestsTotal.Inc()
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
metrics.CPUManagerPinningErrorsTotal.Inc()
|
||||
return
|
||||
}
|
||||
if !p.options.FullPhysicalCPUsOnly {
|
||||
// increment only if we know we allocate aligned resources
|
||||
return
|
||||
}
|
||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc()
|
||||
}()
|
||||
|
||||
if p.options.FullPhysicalCPUsOnly {
|
||||
if (numCPUs % p.cpuGroupSize) != 0 {
|
||||
// Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted
|
||||
// if the CPU requested is a multiple of the number of virtual cpus per physical cores.
|
||||
// In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put
|
||||
// in Failed state, with SMTAlignmentError as reason. Since the allocation happens in terms of physical cores
|
||||
// and the scheduler is responsible for ensuring that the workload goes to a node that has enough CPUs,
|
||||
// the pod would be placed on a node where there are enough physical cores available to be allocated.
|
||||
// Just like the behaviour in case of static policy, takeByTopology will try to first allocate CPUs from the same socket
|
||||
// and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all
|
||||
// CPUs on a physical core. Allocation of individual threads would never have to occur.
|
||||
return SMTAlignmentError{
|
||||
RequestedCPUs: numCPUs,
|
||||
CpusPerCore: p.cpuGroupSize,
|
||||
CausedByPhysicalCPUs: false,
|
||||
}
|
||||
}
|
||||
|
||||
availablePhysicalCPUs := p.GetAvailablePhysicalCPUs(s).Size()
|
||||
|
||||
// It's legal to reserve CPUs which are not core siblings. In this case the CPU allocator can descend to single cores
|
||||
// when picking CPUs. This will void the guarantee of FullPhysicalCPUsOnly. To prevent this, we need to additionally consider
|
||||
// all the core siblings of the reserved CPUs as unavailable when computing the free CPUs, before to start the actual allocation.
|
||||
// This way, by construction all possible CPUs allocation whose number is multiple of the SMT level are now correct again.
|
||||
if numCPUs > availablePhysicalCPUs {
|
||||
return SMTAlignmentError{
|
||||
RequestedCPUs: numCPUs,
|
||||
CpusPerCore: p.cpuGroupSize,
|
||||
AvailablePhysicalCPUs: availablePhysicalCPUs,
|
||||
CausedByPhysicalCPUs: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
p.updateCPUsToReuse(pod, container, cpuset)
|
||||
klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Call Topology Manager to get the aligned socket affinity across all hint providers.
|
||||
hint := p.affinity.GetAffinity(string(pod.UID), container.Name)
|
||||
klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint)
|
||||
|
||||
// Allocate CPUs according to the NUMA affinity contained in the hint.
|
||||
cpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)])
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Unable to allocate CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs)
|
||||
return err
|
||||
}
|
||||
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
p.updateCPUsToReuse(pod, container, cpuset)
|
||||
p.updateMetricsOnAllocate(cpuset)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// getAssignedCPUsOfSiblings returns assigned cpus of given container's siblings(all containers other than the given container) in the given pod `podUID`.
|
||||
func getAssignedCPUsOfSiblings(s state.State, podUID string, containerName string) cpuset.CPUSet {
|
||||
assignments := s.GetCPUAssignments()
|
||||
cset := cpuset.New()
|
||||
for name, cpus := range assignments[podUID] {
|
||||
if containerName == name {
|
||||
continue
|
||||
}
|
||||
cset = cset.Union(cpus)
|
||||
}
|
||||
return cset
|
||||
}
|
||||
|
||||
func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
|
||||
klog.InfoS("Static policy: RemoveContainer", "podUID", podUID, "containerName", containerName)
|
||||
cpusInUse := getAssignedCPUsOfSiblings(s, podUID, containerName)
|
||||
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||
s.Delete(podUID, containerName)
|
||||
// Mutate the shared pool, adding released cpus.
|
||||
toRelease = toRelease.Difference(cpusInUse)
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||
p.updateMetricsOnRelease(toRelease)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (cpuset.CPUSet, error) {
|
||||
klog.InfoS("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity)
|
||||
|
||||
allocatableCPUs := p.GetAvailableCPUs(s).Union(reusableCPUs)
|
||||
|
||||
// If there are aligned CPUs in numaAffinity, attempt to take those first.
|
||||
result := cpuset.New()
|
||||
if numaAffinity != nil {
|
||||
alignedCPUs := p.getAlignedCPUs(numaAffinity, allocatableCPUs)
|
||||
|
||||
numAlignedToAlloc := alignedCPUs.Size()
|
||||
if numCPUs < numAlignedToAlloc {
|
||||
numAlignedToAlloc = numCPUs
|
||||
}
|
||||
|
||||
alignedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc)
|
||||
if err != nil {
|
||||
return cpuset.New(), err
|
||||
}
|
||||
|
||||
result = result.Union(alignedCPUs)
|
||||
}
|
||||
|
||||
// Get any remaining CPUs from what's leftover after attempting to grab aligned ones.
|
||||
remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result), numCPUs-result.Size())
|
||||
if err != nil {
|
||||
return cpuset.New(), err
|
||||
}
|
||||
result = result.Union(remainingCPUs)
|
||||
|
||||
// Remove allocated CPUs from the shared CPUSet.
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result))
|
||||
|
||||
klog.InfoS("AllocateCPUs", "result", result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int {
|
||||
if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed {
|
||||
return 0
|
||||
}
|
||||
cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
|
||||
// In-place pod resize feature makes Container.Resources field mutable for CPU & memory.
|
||||
// AllocatedResources holds the value of Container.Resources.Requests when the pod was admitted.
|
||||
// We should return this value because this is what kubelet agreed to allocate for the container
|
||||
// and the value configured with runtime.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
|
||||
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
|
||||
cpuQuantity = cs.AllocatedResources[v1.ResourceCPU]
|
||||
}
|
||||
}
|
||||
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||
return 0
|
||||
}
|
||||
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||
// https://golang.org/ref/spec#Numeric_types
|
||||
return int(cpuQuantity.Value())
|
||||
}
|
||||
|
||||
func (p *staticPolicy) podGuaranteedCPUs(pod *v1.Pod) int {
|
||||
// The maximum of requested CPUs by init containers.
|
||||
requestedByInitContainers := 0
|
||||
requestedByRestartableInitContainers := 0
|
||||
for _, container := range pod.Spec.InitContainers {
|
||||
if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok {
|
||||
continue
|
||||
}
|
||||
requestedCPU := p.guaranteedCPUs(pod, &container)
|
||||
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#resources-calculation-for-scheduling-and-pod-admission
|
||||
// for the detail.
|
||||
if podutil.IsRestartableInitContainer(&container) {
|
||||
requestedByRestartableInitContainers += requestedCPU
|
||||
} else if requestedByRestartableInitContainers+requestedCPU > requestedByInitContainers {
|
||||
requestedByInitContainers = requestedByRestartableInitContainers + requestedCPU
|
||||
}
|
||||
}
|
||||
|
||||
// The sum of requested CPUs by app containers.
|
||||
requestedByAppContainers := 0
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok {
|
||||
continue
|
||||
}
|
||||
requestedByAppContainers += p.guaranteedCPUs(pod, &container)
|
||||
}
|
||||
|
||||
requestedByLongRunningContainers := requestedByAppContainers + requestedByRestartableInitContainers
|
||||
if requestedByInitContainers > requestedByLongRunningContainers {
|
||||
return requestedByInitContainers
|
||||
}
|
||||
return requestedByLongRunningContainers
|
||||
}
|
||||
|
||||
func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
|
||||
cpuSortingStrategy := CPUSortingStrategyPacked
|
||||
if p.options.DistributeCPUsAcrossCores {
|
||||
cpuSortingStrategy = CPUSortingStrategySpread
|
||||
}
|
||||
|
||||
if p.options.DistributeCPUsAcrossNUMA {
|
||||
cpuGroupSize := 1
|
||||
if p.options.FullPhysicalCPUsOnly {
|
||||
cpuGroupSize = p.cpuGroupSize
|
||||
}
|
||||
return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy)
|
||||
}
|
||||
|
||||
return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
// Get a count of how many guaranteed CPUs have been requested.
|
||||
requested := p.guaranteedCPUs(pod, container)
|
||||
|
||||
// Number of required CPUs is not an integer or a container is not part of the Guaranteed QoS class.
|
||||
// It will be treated by the TopologyManager as having no preference and cause it to ignore this
|
||||
// resource when considering pod alignment.
|
||||
// In terms of hints, this is equal to: TopologyHints[NUMANodeAffinity: nil, Preferred: true].
|
||||
if requested == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Short circuit to regenerate the same hints if there are already
|
||||
// guaranteed CPUs allocated to the Container. This might happen after a
|
||||
// kubelet restart, for example.
|
||||
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
|
||||
if allocated.Size() != requested {
|
||||
klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size())
|
||||
// An empty list of hints will be treated as a preference that cannot be satisfied.
|
||||
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
|
||||
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error.
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
string(v1.ResourceCPU): {},
|
||||
}
|
||||
}
|
||||
klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested),
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of available CPUs.
|
||||
available := p.GetAvailableCPUs(s)
|
||||
|
||||
// Get a list of reusable CPUs (e.g. CPUs reused from initContainers).
|
||||
// It should be an empty CPUSet for a newly created pod.
|
||||
reusable := p.cpusToReuse[string(pod.UID)]
|
||||
|
||||
// Generate hints.
|
||||
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
|
||||
klog.InfoS("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints)
|
||||
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
string(v1.ResourceCPU): cpuHints,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
// Get a count of how many guaranteed CPUs have been requested by Pod.
|
||||
requested := p.podGuaranteedCPUs(pod)
|
||||
|
||||
// Number of required CPUs is not an integer or a pod is not part of the Guaranteed QoS class.
|
||||
// It will be treated by the TopologyManager as having no preference and cause it to ignore this
|
||||
// resource when considering pod alignment.
|
||||
// In terms of hints, this is equal to: TopologyHints[NUMANodeAffinity: nil, Preferred: true].
|
||||
if requested == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
assignedCPUs := cpuset.New()
|
||||
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
|
||||
requestedByContainer := p.guaranteedCPUs(pod, &container)
|
||||
// Short circuit to regenerate the same hints if there are already
|
||||
// guaranteed CPUs allocated to the Container. This might happen after a
|
||||
// kubelet restart, for example.
|
||||
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
|
||||
if allocated.Size() != requestedByContainer {
|
||||
klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size())
|
||||
// An empty list of hints will be treated as a preference that cannot be satisfied.
|
||||
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
|
||||
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error.
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
string(v1.ResourceCPU): {},
|
||||
}
|
||||
}
|
||||
// A set of CPUs already assigned to containers in this pod
|
||||
assignedCPUs = assignedCPUs.Union(allocated)
|
||||
}
|
||||
}
|
||||
if assignedCPUs.Size() == requested {
|
||||
klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod))
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
string(v1.ResourceCPU): p.generateCPUTopologyHints(assignedCPUs, cpuset.CPUSet{}, requested),
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of available CPUs.
|
||||
available := p.GetAvailableCPUs(s)
|
||||
|
||||
// Get a list of reusable CPUs (e.g. CPUs reused from initContainers).
|
||||
// It should be an empty CPUSet for a newly created pod.
|
||||
reusable := p.cpusToReuse[string(pod.UID)]
|
||||
|
||||
// Ensure any CPUs already assigned to containers in this pod are included as part of the hint generation.
|
||||
reusable = reusable.Union(assignedCPUs)
|
||||
|
||||
// Generate hints.
|
||||
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
|
||||
klog.InfoS("TopologyHints generated", "pod", klog.KObj(pod), "cpuHints", cpuHints)
|
||||
|
||||
return map[string][]topologymanager.TopologyHint{
|
||||
string(v1.ResourceCPU): cpuHints,
|
||||
}
|
||||
}
|
||||
|
||||
// generateCPUTopologyHints generates a set of TopologyHints given the set of
|
||||
// available CPUs and the number of CPUs being requested.
|
||||
//
|
||||
// It follows the convention of marking all hints that have the same number of
|
||||
// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and
|
||||
// marking all others with 'Preferred: false'.
|
||||
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
|
||||
// Initialize minAffinitySize to include all NUMA Nodes.
|
||||
minAffinitySize := p.topology.CPUDetails.NUMANodes().Size()
|
||||
|
||||
// Iterate through all combinations of numa nodes bitmask and build hints from them.
|
||||
hints := []topologymanager.TopologyHint{}
|
||||
bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().List(), func(mask bitmask.BitMask) {
|
||||
// First, update minAffinitySize for the current request size.
|
||||
cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size()
|
||||
if cpusInMask >= request && mask.Count() < minAffinitySize {
|
||||
minAffinitySize = mask.Count()
|
||||
}
|
||||
|
||||
// Then check to see if we have enough CPUs available on the current
|
||||
// numa node bitmask to satisfy the CPU request.
|
||||
numMatching := 0
|
||||
for _, c := range reusableCPUs.List() {
|
||||
// Disregard this mask if its NUMANode isn't part of it.
|
||||
if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
|
||||
return
|
||||
}
|
||||
numMatching++
|
||||
}
|
||||
|
||||
// Finally, check to see if enough available CPUs remain on the current
|
||||
// NUMA node combination to satisfy the CPU request.
|
||||
for _, c := range availableCPUs.List() {
|
||||
if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
|
||||
numMatching++
|
||||
}
|
||||
}
|
||||
|
||||
// If they don't, then move onto the next combination.
|
||||
if numMatching < request {
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, create a new hint from the numa node bitmask and add it to the
|
||||
// list of hints. We set all hint preferences to 'false' on the first
|
||||
// pass through.
|
||||
hints = append(hints, topologymanager.TopologyHint{
|
||||
NUMANodeAffinity: mask,
|
||||
Preferred: false,
|
||||
})
|
||||
})
|
||||
|
||||
// Loop back through all hints and update the 'Preferred' field based on
|
||||
// counting the number of bits sets in the affinity mask and comparing it
|
||||
// to the minAffinitySize. Only those with an equal number of bits set (and
|
||||
// with a minimal set of numa nodes) will be considered preferred.
|
||||
for i := range hints {
|
||||
if p.options.AlignBySocket && p.isHintSocketAligned(hints[i], minAffinitySize) {
|
||||
hints[i].Preferred = true
|
||||
continue
|
||||
}
|
||||
if hints[i].NUMANodeAffinity.Count() == minAffinitySize {
|
||||
hints[i].Preferred = true
|
||||
}
|
||||
}
|
||||
|
||||
return hints
|
||||
}
|
||||
|
||||
// isHintSocketAligned function return true if numa nodes in hint are socket aligned.
|
||||
func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool {
|
||||
numaNodesBitMask := hint.NUMANodeAffinity.GetBits()
|
||||
numaNodesPerSocket := p.topology.NumNUMANodes / p.topology.NumSockets
|
||||
if numaNodesPerSocket == 0 {
|
||||
return false
|
||||
}
|
||||
// minSockets refers to minimum number of socket required to satify allocation.
|
||||
// A hint is considered socket aligned if sockets across which numa nodes span is equal to minSockets
|
||||
minSockets := (minAffinitySize + numaNodesPerSocket - 1) / numaNodesPerSocket
|
||||
return p.topology.CPUDetails.SocketsInNUMANodes(numaNodesBitMask...).Size() == minSockets
|
||||
}
|
||||
|
||||
// getAlignedCPUs return set of aligned CPUs based on numa affinity mask and configured policy options.
|
||||
func (p *staticPolicy) getAlignedCPUs(numaAffinity bitmask.BitMask, allocatableCPUs cpuset.CPUSet) cpuset.CPUSet {
|
||||
alignedCPUs := cpuset.New()
|
||||
numaBits := numaAffinity.GetBits()
|
||||
|
||||
// If align-by-socket policy option is enabled, NUMA based hint is expanded to
|
||||
// socket aligned hint. It will ensure that first socket aligned available CPUs are
|
||||
// allocated before we try to find CPUs across socket to satisfy allocation request.
|
||||
if p.options.AlignBySocket {
|
||||
socketBits := p.topology.CPUDetails.SocketsInNUMANodes(numaBits...).UnsortedList()
|
||||
for _, socketID := range socketBits {
|
||||
alignedCPUs = alignedCPUs.Union(allocatableCPUs.Intersection(p.topology.CPUDetails.CPUsInSockets(socketID)))
|
||||
}
|
||||
return alignedCPUs
|
||||
}
|
||||
|
||||
for _, numaNodeID := range numaBits {
|
||||
alignedCPUs = alignedCPUs.Union(allocatableCPUs.Intersection(p.topology.CPUDetails.CPUsInNUMANodes(numaNodeID)))
|
||||
}
|
||||
|
||||
return alignedCPUs
|
||||
}
|
||||
|
||||
func (p *staticPolicy) initializeMetrics(s state.State) {
|
||||
metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000))
|
||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(countExclusiveCPUs(s)))
|
||||
}
|
||||
|
||||
func (p *staticPolicy) updateMetricsOnAllocate(cset cpuset.CPUSet) {
|
||||
ncpus := cset.Size()
|
||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(ncpus))
|
||||
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(-ncpus * 1000))
|
||||
}
|
||||
|
||||
func (p *staticPolicy) updateMetricsOnRelease(cset cpuset.CPUSet) {
|
||||
ncpus := cset.Size()
|
||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(-ncpus))
|
||||
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(ncpus * 1000))
|
||||
}
|
||||
|
||||
func countExclusiveCPUs(s state.State) int {
|
||||
exclusiveCPUs := 0
|
||||
for _, cpuAssign := range s.GetCPUAssignments() {
|
||||
for _, cset := range cpuAssign {
|
||||
exclusiveCPUs += cset.Size()
|
||||
}
|
||||
}
|
||||
return exclusiveCPUs
|
||||
}
|
135
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/checkpoint.go
generated
vendored
Normal file
135
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"strings"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/dump"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV1{}
|
||||
var _ checkpointmanager.Checkpoint = &CPUManagerCheckpointV2{}
|
||||
var _ checkpointmanager.Checkpoint = &CPUManagerCheckpoint{}
|
||||
|
||||
// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint in v2 format
|
||||
type CPUManagerCheckpoint struct {
|
||||
PolicyName string `json:"policyName"`
|
||||
DefaultCPUSet string `json:"defaultCpuSet"`
|
||||
Entries map[string]map[string]string `json:"entries,omitempty"`
|
||||
Checksum checksum.Checksum `json:"checksum"`
|
||||
}
|
||||
|
||||
// CPUManagerCheckpointV1 struct is used to store cpu/pod assignments in a checkpoint in v1 format
|
||||
type CPUManagerCheckpointV1 struct {
|
||||
PolicyName string `json:"policyName"`
|
||||
DefaultCPUSet string `json:"defaultCpuSet"`
|
||||
Entries map[string]string `json:"entries,omitempty"`
|
||||
Checksum checksum.Checksum `json:"checksum"`
|
||||
}
|
||||
|
||||
// CPUManagerCheckpointV2 struct is used to store cpu/pod assignments in a checkpoint in v2 format
|
||||
type CPUManagerCheckpointV2 = CPUManagerCheckpoint
|
||||
|
||||
// NewCPUManagerCheckpoint returns an instance of Checkpoint
|
||||
func NewCPUManagerCheckpoint() *CPUManagerCheckpoint {
|
||||
//nolint:staticcheck // unexported-type-in-api user-facing error message
|
||||
return newCPUManagerCheckpointV2()
|
||||
}
|
||||
|
||||
func newCPUManagerCheckpointV1() *CPUManagerCheckpointV1 {
|
||||
return &CPUManagerCheckpointV1{
|
||||
Entries: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
func newCPUManagerCheckpointV2() *CPUManagerCheckpointV2 {
|
||||
return &CPUManagerCheckpointV2{
|
||||
Entries: make(map[string]map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
// MarshalCheckpoint returns marshalled checkpoint in v1 format
|
||||
func (cp *CPUManagerCheckpointV1) MarshalCheckpoint() ([]byte, error) {
|
||||
// make sure checksum wasn't set before so it doesn't affect output checksum
|
||||
cp.Checksum = 0
|
||||
cp.Checksum = checksum.New(cp)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
// MarshalCheckpoint returns marshalled checkpoint in v2 format
|
||||
func (cp *CPUManagerCheckpointV2) MarshalCheckpoint() ([]byte, error) {
|
||||
// make sure checksum wasn't set before so it doesn't affect output checksum
|
||||
cp.Checksum = 0
|
||||
cp.Checksum = checksum.New(cp)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v1 format
|
||||
func (cp *CPUManagerCheckpointV1) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint in v2 format
|
||||
func (cp *CPUManagerCheckpointV2) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that current checksum of checkpoint is valid in v1 format
|
||||
func (cp *CPUManagerCheckpointV1) VerifyChecksum() error {
|
||||
if cp.Checksum == 0 {
|
||||
// accept empty checksum for compatibility with old file backend
|
||||
return nil
|
||||
}
|
||||
|
||||
ck := cp.Checksum
|
||||
cp.Checksum = 0
|
||||
object := dump.ForHash(cp)
|
||||
object = strings.Replace(object, "CPUManagerCheckpointV1", "CPUManagerCheckpoint", 1)
|
||||
cp.Checksum = ck
|
||||
|
||||
hash := fnv.New32a()
|
||||
fmt.Fprintf(hash, "%v", object)
|
||||
actualCS := checksum.Checksum(hash.Sum32())
|
||||
if cp.Checksum != actualCS {
|
||||
return &errors.CorruptCheckpointError{
|
||||
ActualCS: uint64(actualCS),
|
||||
ExpectedCS: uint64(cp.Checksum),
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format
|
||||
func (cp *CPUManagerCheckpointV2) VerifyChecksum() error {
|
||||
if cp.Checksum == 0 {
|
||||
// accept empty checksum for compatibility with old file backend
|
||||
return nil
|
||||
}
|
||||
ck := cp.Checksum
|
||||
cp.Checksum = 0
|
||||
err := ck.Verify(cp)
|
||||
cp.Checksum = ck
|
||||
return err
|
||||
}
|
58
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/state.go
generated
vendored
Normal file
58
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/state.go
generated
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
// ContainerCPUAssignments type used in cpu manager state
|
||||
type ContainerCPUAssignments map[string]map[string]cpuset.CPUSet
|
||||
|
||||
// Clone returns a copy of ContainerCPUAssignments
|
||||
func (as ContainerCPUAssignments) Clone() ContainerCPUAssignments {
|
||||
ret := make(ContainerCPUAssignments, len(as))
|
||||
for pod := range as {
|
||||
ret[pod] = make(map[string]cpuset.CPUSet, len(as[pod]))
|
||||
for container, cset := range as[pod] {
|
||||
ret[pod][container] = cset
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// Reader interface used to read current cpu/pod assignment state
|
||||
type Reader interface {
|
||||
GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool)
|
||||
GetDefaultCPUSet() cpuset.CPUSet
|
||||
GetCPUSetOrDefault(podUID string, containerName string) cpuset.CPUSet
|
||||
GetCPUAssignments() ContainerCPUAssignments
|
||||
}
|
||||
|
||||
type writer interface {
|
||||
SetCPUSet(podUID string, containerName string, cpuset cpuset.CPUSet)
|
||||
SetDefaultCPUSet(cpuset cpuset.CPUSet)
|
||||
SetCPUAssignments(ContainerCPUAssignments)
|
||||
Delete(podUID string, containerName string)
|
||||
ClearState()
|
||||
}
|
||||
|
||||
// State interface provides methods for tracking and setting cpu/pod assignment
|
||||
type State interface {
|
||||
Reader
|
||||
writer
|
||||
}
|
250
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go
generated
vendored
Normal file
250
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,250 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
var _ State = &stateCheckpoint{}
|
||||
|
||||
type stateCheckpoint struct {
|
||||
mux sync.RWMutex
|
||||
policyName string
|
||||
cache State
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
checkpointName string
|
||||
initialContainers containermap.ContainerMap
|
||||
}
|
||||
|
||||
// NewCheckpointState creates new State for keeping track of cpu/pod assignment with checkpoint backend
|
||||
func NewCheckpointState(stateDir, checkpointName, policyName string, initialContainers containermap.ContainerMap) (State, error) {
|
||||
checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
|
||||
}
|
||||
stateCheckpoint := &stateCheckpoint{
|
||||
cache: NewMemoryState(),
|
||||
policyName: policyName,
|
||||
checkpointManager: checkpointManager,
|
||||
checkpointName: checkpointName,
|
||||
initialContainers: initialContainers,
|
||||
}
|
||||
|
||||
if err := stateCheckpoint.restoreState(); err != nil {
|
||||
//nolint:staticcheck // ST1005 user-facing error message
|
||||
return nil, fmt.Errorf("could not restore state from checkpoint: %v, please drain this node and delete the CPU manager checkpoint file %q before restarting Kubelet",
|
||||
err, filepath.Join(stateDir, checkpointName))
|
||||
}
|
||||
|
||||
return stateCheckpoint, nil
|
||||
}
|
||||
|
||||
// migrateV1CheckpointToV2Checkpoint() converts checkpoints from the v1 format to the v2 format
|
||||
func (sc *stateCheckpoint) migrateV1CheckpointToV2Checkpoint(src *CPUManagerCheckpointV1, dst *CPUManagerCheckpointV2) error {
|
||||
if src.PolicyName != "" {
|
||||
dst.PolicyName = src.PolicyName
|
||||
}
|
||||
if src.DefaultCPUSet != "" {
|
||||
dst.DefaultCPUSet = src.DefaultCPUSet
|
||||
}
|
||||
for containerID, cset := range src.Entries {
|
||||
podUID, containerName, err := sc.initialContainers.GetContainerRef(containerID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("containerID '%v' not found in initial containers list", containerID)
|
||||
}
|
||||
if dst.Entries == nil {
|
||||
dst.Entries = make(map[string]map[string]string)
|
||||
}
|
||||
if _, exists := dst.Entries[podUID]; !exists {
|
||||
dst.Entries[podUID] = make(map[string]string)
|
||||
}
|
||||
dst.Entries[podUID][containerName] = cset
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// restores state from a checkpoint and creates it if it doesn't exist
|
||||
func (sc *stateCheckpoint) restoreState() error {
|
||||
sc.mux.Lock()
|
||||
defer sc.mux.Unlock()
|
||||
var err error
|
||||
|
||||
checkpointV1 := newCPUManagerCheckpointV1()
|
||||
checkpointV2 := newCPUManagerCheckpointV2()
|
||||
|
||||
if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV1); err != nil {
|
||||
checkpointV1 = &CPUManagerCheckpointV1{} // reset it back to 0
|
||||
if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpointV2); err != nil {
|
||||
if err == errors.ErrCheckpointNotFound {
|
||||
return sc.storeState()
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err = sc.migrateV1CheckpointToV2Checkpoint(checkpointV1, checkpointV2); err != nil {
|
||||
return fmt.Errorf("error migrating v1 checkpoint state to v2 checkpoint state: %s", err)
|
||||
}
|
||||
|
||||
if sc.policyName != checkpointV2.PolicyName {
|
||||
return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpointV2.PolicyName)
|
||||
}
|
||||
|
||||
var tmpDefaultCPUSet cpuset.CPUSet
|
||||
if tmpDefaultCPUSet, err = cpuset.Parse(checkpointV2.DefaultCPUSet); err != nil {
|
||||
return fmt.Errorf("could not parse default cpu set %q: %v", checkpointV2.DefaultCPUSet, err)
|
||||
}
|
||||
|
||||
var tmpContainerCPUSet cpuset.CPUSet
|
||||
tmpAssignments := ContainerCPUAssignments{}
|
||||
for pod := range checkpointV2.Entries {
|
||||
tmpAssignments[pod] = make(map[string]cpuset.CPUSet, len(checkpointV2.Entries[pod]))
|
||||
for container, cpuString := range checkpointV2.Entries[pod] {
|
||||
if tmpContainerCPUSet, err = cpuset.Parse(cpuString); err != nil {
|
||||
return fmt.Errorf("could not parse cpuset %q for container %q in pod %q: %v", cpuString, container, pod, err)
|
||||
}
|
||||
tmpAssignments[pod][container] = tmpContainerCPUSet
|
||||
}
|
||||
}
|
||||
|
||||
sc.cache.SetDefaultCPUSet(tmpDefaultCPUSet)
|
||||
sc.cache.SetCPUAssignments(tmpAssignments)
|
||||
|
||||
klog.V(2).InfoS("State checkpoint: restored state from checkpoint")
|
||||
klog.V(2).InfoS("State checkpoint: defaultCPUSet", "defaultCpuSet", tmpDefaultCPUSet.String())
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// saves state to a checkpoint, caller is responsible for locking
|
||||
func (sc *stateCheckpoint) storeState() error {
|
||||
checkpoint := NewCPUManagerCheckpoint()
|
||||
checkpoint.PolicyName = sc.policyName
|
||||
checkpoint.DefaultCPUSet = sc.cache.GetDefaultCPUSet().String()
|
||||
|
||||
assignments := sc.cache.GetCPUAssignments()
|
||||
for pod := range assignments {
|
||||
checkpoint.Entries[pod] = make(map[string]string, len(assignments[pod]))
|
||||
for container, cset := range assignments[pod] {
|
||||
checkpoint.Entries[pod][container] = cset.String()
|
||||
}
|
||||
}
|
||||
|
||||
err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to save checkpoint")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetCPUSet returns current CPU set
|
||||
func (sc *stateCheckpoint) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) {
|
||||
sc.mux.RLock()
|
||||
defer sc.mux.RUnlock()
|
||||
|
||||
res, ok := sc.cache.GetCPUSet(podUID, containerName)
|
||||
return res, ok
|
||||
}
|
||||
|
||||
// GetDefaultCPUSet returns default CPU set
|
||||
func (sc *stateCheckpoint) GetDefaultCPUSet() cpuset.CPUSet {
|
||||
sc.mux.RLock()
|
||||
defer sc.mux.RUnlock()
|
||||
|
||||
return sc.cache.GetDefaultCPUSet()
|
||||
}
|
||||
|
||||
// GetCPUSetOrDefault returns current CPU set, or default one if it wasn't changed
|
||||
func (sc *stateCheckpoint) GetCPUSetOrDefault(podUID string, containerName string) cpuset.CPUSet {
|
||||
sc.mux.RLock()
|
||||
defer sc.mux.RUnlock()
|
||||
|
||||
return sc.cache.GetCPUSetOrDefault(podUID, containerName)
|
||||
}
|
||||
|
||||
// GetCPUAssignments returns current CPU to pod assignments
|
||||
func (sc *stateCheckpoint) GetCPUAssignments() ContainerCPUAssignments {
|
||||
sc.mux.RLock()
|
||||
defer sc.mux.RUnlock()
|
||||
|
||||
return sc.cache.GetCPUAssignments()
|
||||
}
|
||||
|
||||
// SetCPUSet sets CPU set
|
||||
func (sc *stateCheckpoint) SetCPUSet(podUID string, containerName string, cset cpuset.CPUSet) {
|
||||
sc.mux.Lock()
|
||||
defer sc.mux.Unlock()
|
||||
sc.cache.SetCPUSet(podUID, containerName, cset)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// SetDefaultCPUSet sets default CPU set
|
||||
func (sc *stateCheckpoint) SetDefaultCPUSet(cset cpuset.CPUSet) {
|
||||
sc.mux.Lock()
|
||||
defer sc.mux.Unlock()
|
||||
sc.cache.SetDefaultCPUSet(cset)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// SetCPUAssignments sets CPU to pod assignments
|
||||
func (sc *stateCheckpoint) SetCPUAssignments(a ContainerCPUAssignments) {
|
||||
sc.mux.Lock()
|
||||
defer sc.mux.Unlock()
|
||||
sc.cache.SetCPUAssignments(a)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete deletes assignment for specified pod
|
||||
func (sc *stateCheckpoint) Delete(podUID string, containerName string) {
|
||||
sc.mux.Lock()
|
||||
defer sc.mux.Unlock()
|
||||
sc.cache.Delete(podUID, containerName)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ClearState clears the state and saves it in a checkpoint
|
||||
func (sc *stateCheckpoint) ClearState() {
|
||||
sc.mux.Lock()
|
||||
defer sc.mux.Unlock()
|
||||
sc.cache.ClearState()
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
117
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/state_mem.go
generated
vendored
Normal file
117
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/state_mem.go
generated
vendored
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
type stateMemory struct {
|
||||
sync.RWMutex
|
||||
assignments ContainerCPUAssignments
|
||||
defaultCPUSet cpuset.CPUSet
|
||||
}
|
||||
|
||||
var _ State = &stateMemory{}
|
||||
|
||||
// NewMemoryState creates new State for keeping track of cpu/pod assignment
|
||||
func NewMemoryState() State {
|
||||
klog.InfoS("Initialized new in-memory state store")
|
||||
return &stateMemory{
|
||||
assignments: ContainerCPUAssignments{},
|
||||
defaultCPUSet: cpuset.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *stateMemory) GetCPUSet(podUID string, containerName string) (cpuset.CPUSet, bool) {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
res, ok := s.assignments[podUID][containerName]
|
||||
return res.Clone(), ok
|
||||
}
|
||||
|
||||
func (s *stateMemory) GetDefaultCPUSet() cpuset.CPUSet {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.defaultCPUSet.Clone()
|
||||
}
|
||||
|
||||
func (s *stateMemory) GetCPUSetOrDefault(podUID string, containerName string) cpuset.CPUSet {
|
||||
if res, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||
return res
|
||||
}
|
||||
return s.GetDefaultCPUSet()
|
||||
}
|
||||
|
||||
func (s *stateMemory) GetCPUAssignments() ContainerCPUAssignments {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
return s.assignments.Clone()
|
||||
}
|
||||
|
||||
func (s *stateMemory) SetCPUSet(podUID string, containerName string, cset cpuset.CPUSet) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
if _, ok := s.assignments[podUID]; !ok {
|
||||
s.assignments[podUID] = make(map[string]cpuset.CPUSet)
|
||||
}
|
||||
|
||||
s.assignments[podUID][containerName] = cset
|
||||
klog.InfoS("Updated desired CPUSet", "podUID", podUID, "containerName", containerName, "cpuSet", cset)
|
||||
}
|
||||
|
||||
func (s *stateMemory) SetDefaultCPUSet(cset cpuset.CPUSet) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.defaultCPUSet = cset
|
||||
klog.InfoS("Updated default CPUSet", "cpuSet", cset)
|
||||
}
|
||||
|
||||
func (s *stateMemory) SetCPUAssignments(a ContainerCPUAssignments) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.assignments = a.Clone()
|
||||
klog.InfoS("Updated CPUSet assignments", "assignments", a)
|
||||
}
|
||||
|
||||
func (s *stateMemory) Delete(podUID string, containerName string) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
delete(s.assignments[podUID], containerName)
|
||||
if len(s.assignments[podUID]) == 0 {
|
||||
delete(s.assignments, podUID)
|
||||
}
|
||||
klog.V(2).InfoS("Deleted CPUSet assignment", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
|
||||
func (s *stateMemory) ClearState() {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.defaultCPUSet = cpuset.CPUSet{}
|
||||
s.assignments = make(ContainerCPUAssignments)
|
||||
klog.V(2).InfoS("Cleared state")
|
||||
}
|
18
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology/doc.go
generated
vendored
Normal file
18
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology/doc.go
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package topology contains helpers for the CPU manager.
|
||||
package topology // import "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
389
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology/topology.go
generated
vendored
Normal file
389
vendor/k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology/topology.go
generated
vendored
Normal file
@ -0,0 +1,389 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package topology
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
// NUMANodeInfo is a map from NUMANode ID to a list of CPU IDs associated with
|
||||
// that NUMANode.
|
||||
type NUMANodeInfo map[int]cpuset.CPUSet
|
||||
|
||||
// CPUDetails is a map from CPU ID to Core ID, Socket ID, and NUMA ID.
|
||||
type CPUDetails map[int]CPUInfo
|
||||
|
||||
// CPUTopology contains details of node cpu, where :
|
||||
// CPU - logical CPU, cadvisor - thread
|
||||
// Core - physical CPU, cadvisor - Core
|
||||
// Socket - socket, cadvisor - Socket
|
||||
// NUMA Node - NUMA cell, cadvisor - Node
|
||||
// UncoreCache - Split L3 Cache Topology, cadvisor
|
||||
type CPUTopology struct {
|
||||
NumCPUs int
|
||||
NumCores int
|
||||
NumUncoreCache int
|
||||
NumSockets int
|
||||
NumNUMANodes int
|
||||
CPUDetails CPUDetails
|
||||
}
|
||||
|
||||
// CPUsPerCore returns the number of logical CPUs are associated with
|
||||
// each core.
|
||||
func (topo *CPUTopology) CPUsPerCore() int {
|
||||
if topo.NumCores == 0 {
|
||||
return 0
|
||||
}
|
||||
return topo.NumCPUs / topo.NumCores
|
||||
}
|
||||
|
||||
// CPUsPerSocket returns the number of logical CPUs are associated with
|
||||
// each socket.
|
||||
func (topo *CPUTopology) CPUsPerSocket() int {
|
||||
if topo.NumSockets == 0 {
|
||||
return 0
|
||||
}
|
||||
return topo.NumCPUs / topo.NumSockets
|
||||
}
|
||||
|
||||
// CPUsPerUncore returns the number of logicial CPUs that are associated with
|
||||
// each UncoreCache
|
||||
func (topo *CPUTopology) CPUsPerUncore() int {
|
||||
if topo.NumUncoreCache == 0 {
|
||||
return 0
|
||||
}
|
||||
return topo.NumCPUs / topo.NumUncoreCache
|
||||
}
|
||||
|
||||
// CPUCoreID returns the physical core ID which the given logical CPU
|
||||
// belongs to.
|
||||
func (topo *CPUTopology) CPUCoreID(cpu int) (int, error) {
|
||||
info, ok := topo.CPUDetails[cpu]
|
||||
if !ok {
|
||||
return -1, fmt.Errorf("unknown CPU ID: %d", cpu)
|
||||
}
|
||||
return info.CoreID, nil
|
||||
}
|
||||
|
||||
// CPUCoreID returns the socket ID which the given logical CPU belongs to.
|
||||
func (topo *CPUTopology) CPUSocketID(cpu int) (int, error) {
|
||||
info, ok := topo.CPUDetails[cpu]
|
||||
if !ok {
|
||||
return -1, fmt.Errorf("unknown CPU ID: %d", cpu)
|
||||
}
|
||||
return info.SocketID, nil
|
||||
}
|
||||
|
||||
// CPUCoreID returns the NUMA node ID which the given logical CPU belongs to.
|
||||
func (topo *CPUTopology) CPUNUMANodeID(cpu int) (int, error) {
|
||||
info, ok := topo.CPUDetails[cpu]
|
||||
if !ok {
|
||||
return -1, fmt.Errorf("unknown CPU ID: %d", cpu)
|
||||
}
|
||||
return info.NUMANodeID, nil
|
||||
}
|
||||
|
||||
// CPUInfo contains the NUMA, socket, UncoreCache and core IDs associated with a CPU.
|
||||
type CPUInfo struct {
|
||||
NUMANodeID int
|
||||
SocketID int
|
||||
CoreID int
|
||||
UncoreCacheID int
|
||||
}
|
||||
|
||||
// KeepOnly returns a new CPUDetails object with only the supplied cpus.
|
||||
func (d CPUDetails) KeepOnly(cpus cpuset.CPUSet) CPUDetails {
|
||||
result := CPUDetails{}
|
||||
for cpu, info := range d {
|
||||
if cpus.Contains(cpu) {
|
||||
result[cpu] = info
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// UncoreCaches returns all the uncorecache Id (L3 Index) associated with the CPUs in this CPUDetails
|
||||
func (d CPUDetails) UncoreCaches() cpuset.CPUSet {
|
||||
var numUnCoreIDs []int
|
||||
for _, info := range d {
|
||||
numUnCoreIDs = append(numUnCoreIDs, info.UncoreCacheID)
|
||||
}
|
||||
return cpuset.New(numUnCoreIDs...)
|
||||
}
|
||||
|
||||
// UnCoresInNUMANodes returns all of the uncore IDs associated with the given
|
||||
// NUMANode IDs in this CPUDetails.
|
||||
func (d CPUDetails) UncoreInNUMANodes(ids ...int) cpuset.CPUSet {
|
||||
var unCoreIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.NUMANodeID == id {
|
||||
unCoreIDs = append(unCoreIDs, info.UncoreCacheID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(unCoreIDs...)
|
||||
}
|
||||
|
||||
// CoresNeededInUncoreCache returns either the full list of all available unique core IDs associated with the given
|
||||
// UnCoreCache IDs in this CPUDetails or subset that matches the ask.
|
||||
func (d CPUDetails) CoresNeededInUncoreCache(numCoresNeeded int, ids ...int) cpuset.CPUSet {
|
||||
coreIDs := d.coresInUncoreCache(ids...)
|
||||
if coreIDs.Size() <= numCoresNeeded {
|
||||
return coreIDs
|
||||
}
|
||||
tmpCoreIDs := coreIDs.List()
|
||||
return cpuset.New(tmpCoreIDs[:numCoresNeeded]...)
|
||||
}
|
||||
|
||||
// Helper function that just gets the cores
|
||||
func (d CPUDetails) coresInUncoreCache(ids ...int) cpuset.CPUSet {
|
||||
var coreIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.UncoreCacheID == id {
|
||||
coreIDs = append(coreIDs, info.CoreID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(coreIDs...)
|
||||
}
|
||||
|
||||
// CPUsInUncoreCaches returns all the logical CPU IDs associated with the given
|
||||
// UnCoreCache IDs in this CPUDetails
|
||||
func (d CPUDetails) CPUsInUncoreCaches(ids ...int) cpuset.CPUSet {
|
||||
var cpuIDs []int
|
||||
for _, id := range ids {
|
||||
for cpu, info := range d {
|
||||
if info.UncoreCacheID == id {
|
||||
cpuIDs = append(cpuIDs, cpu)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(cpuIDs...)
|
||||
}
|
||||
|
||||
// NUMANodes returns all of the NUMANode IDs associated with the CPUs in this
|
||||
// CPUDetails.
|
||||
func (d CPUDetails) NUMANodes() cpuset.CPUSet {
|
||||
var numaNodeIDs []int
|
||||
for _, info := range d {
|
||||
numaNodeIDs = append(numaNodeIDs, info.NUMANodeID)
|
||||
}
|
||||
return cpuset.New(numaNodeIDs...)
|
||||
}
|
||||
|
||||
// NUMANodesInSockets returns all of the logical NUMANode IDs associated with
|
||||
// the given socket IDs in this CPUDetails.
|
||||
func (d CPUDetails) NUMANodesInSockets(ids ...int) cpuset.CPUSet {
|
||||
var numaNodeIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.SocketID == id {
|
||||
numaNodeIDs = append(numaNodeIDs, info.NUMANodeID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(numaNodeIDs...)
|
||||
}
|
||||
|
||||
// Sockets returns all of the socket IDs associated with the CPUs in this
|
||||
// CPUDetails.
|
||||
func (d CPUDetails) Sockets() cpuset.CPUSet {
|
||||
var socketIDs []int
|
||||
for _, info := range d {
|
||||
socketIDs = append(socketIDs, info.SocketID)
|
||||
}
|
||||
return cpuset.New(socketIDs...)
|
||||
}
|
||||
|
||||
// CPUsInSockets returns all of the logical CPU IDs associated with the given
|
||||
// socket IDs in this CPUDetails.
|
||||
func (d CPUDetails) CPUsInSockets(ids ...int) cpuset.CPUSet {
|
||||
var cpuIDs []int
|
||||
for _, id := range ids {
|
||||
for cpu, info := range d {
|
||||
if info.SocketID == id {
|
||||
cpuIDs = append(cpuIDs, cpu)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(cpuIDs...)
|
||||
}
|
||||
|
||||
// SocketsInNUMANodes returns all of the logical Socket IDs associated with the
|
||||
// given NUMANode IDs in this CPUDetails.
|
||||
func (d CPUDetails) SocketsInNUMANodes(ids ...int) cpuset.CPUSet {
|
||||
var socketIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.NUMANodeID == id {
|
||||
socketIDs = append(socketIDs, info.SocketID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(socketIDs...)
|
||||
}
|
||||
|
||||
// Cores returns all of the core IDs associated with the CPUs in this
|
||||
// CPUDetails.
|
||||
func (d CPUDetails) Cores() cpuset.CPUSet {
|
||||
var coreIDs []int
|
||||
for _, info := range d {
|
||||
coreIDs = append(coreIDs, info.CoreID)
|
||||
}
|
||||
return cpuset.New(coreIDs...)
|
||||
}
|
||||
|
||||
// CoresInNUMANodes returns all of the core IDs associated with the given
|
||||
// NUMANode IDs in this CPUDetails.
|
||||
func (d CPUDetails) CoresInNUMANodes(ids ...int) cpuset.CPUSet {
|
||||
var coreIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.NUMANodeID == id {
|
||||
coreIDs = append(coreIDs, info.CoreID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(coreIDs...)
|
||||
}
|
||||
|
||||
// CoresInSockets returns all of the core IDs associated with the given socket
|
||||
// IDs in this CPUDetails.
|
||||
func (d CPUDetails) CoresInSockets(ids ...int) cpuset.CPUSet {
|
||||
var coreIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.SocketID == id {
|
||||
coreIDs = append(coreIDs, info.CoreID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(coreIDs...)
|
||||
}
|
||||
|
||||
// CPUs returns all of the logical CPU IDs in this CPUDetails.
|
||||
func (d CPUDetails) CPUs() cpuset.CPUSet {
|
||||
var cpuIDs []int
|
||||
for cpuID := range d {
|
||||
cpuIDs = append(cpuIDs, cpuID)
|
||||
}
|
||||
return cpuset.New(cpuIDs...)
|
||||
}
|
||||
|
||||
// CPUsInNUMANodes returns all of the logical CPU IDs associated with the given
|
||||
// NUMANode IDs in this CPUDetails.
|
||||
func (d CPUDetails) CPUsInNUMANodes(ids ...int) cpuset.CPUSet {
|
||||
var cpuIDs []int
|
||||
for _, id := range ids {
|
||||
for cpu, info := range d {
|
||||
if info.NUMANodeID == id {
|
||||
cpuIDs = append(cpuIDs, cpu)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(cpuIDs...)
|
||||
}
|
||||
|
||||
// CPUsInCores returns all of the logical CPU IDs associated with the given
|
||||
// core IDs in this CPUDetails.
|
||||
func (d CPUDetails) CPUsInCores(ids ...int) cpuset.CPUSet {
|
||||
var cpuIDs []int
|
||||
for _, id := range ids {
|
||||
for cpu, info := range d {
|
||||
if info.CoreID == id {
|
||||
cpuIDs = append(cpuIDs, cpu)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(cpuIDs...)
|
||||
}
|
||||
|
||||
func getUncoreCacheID(core cadvisorapi.Core) int {
|
||||
if len(core.UncoreCaches) < 1 {
|
||||
// In case cAdvisor is nil, failback to socket alignment since uncorecache is not shared
|
||||
return core.SocketID
|
||||
}
|
||||
// Even though cadvisor API returns a slice, we only expect either 0 or a 1 uncore caches,
|
||||
// so everything past the first entry should be discarded or ignored
|
||||
return core.UncoreCaches[0].Id
|
||||
}
|
||||
|
||||
// Discover returns CPUTopology based on cadvisor node info
|
||||
func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
||||
if machineInfo.NumCores == 0 {
|
||||
return nil, fmt.Errorf("could not detect number of cpus")
|
||||
}
|
||||
|
||||
CPUDetails := CPUDetails{}
|
||||
numPhysicalCores := 0
|
||||
|
||||
for _, node := range machineInfo.Topology {
|
||||
numPhysicalCores += len(node.Cores)
|
||||
for _, core := range node.Cores {
|
||||
if coreID, err := getUniqueCoreID(core.Threads); err == nil {
|
||||
for _, cpu := range core.Threads {
|
||||
CPUDetails[cpu] = CPUInfo{
|
||||
CoreID: coreID,
|
||||
SocketID: core.SocketID,
|
||||
NUMANodeID: node.Id,
|
||||
UncoreCacheID: getUncoreCacheID(core),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
klog.ErrorS(nil, "Could not get unique coreID for socket", "socket", core.SocketID, "core", core.Id, "threads", core.Threads)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &CPUTopology{
|
||||
NumCPUs: machineInfo.NumCores,
|
||||
NumSockets: machineInfo.NumSockets,
|
||||
NumCores: numPhysicalCores,
|
||||
NumNUMANodes: CPUDetails.NUMANodes().Size(),
|
||||
NumUncoreCache: CPUDetails.UncoreCaches().Size(),
|
||||
CPUDetails: CPUDetails,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getUniqueCoreID computes coreId as the lowest cpuID
|
||||
// for a given Threads []int slice. This will assure that coreID's are
|
||||
// platform unique (opposite to what cAdvisor reports)
|
||||
func getUniqueCoreID(threads []int) (coreID int, err error) {
|
||||
if len(threads) == 0 {
|
||||
return 0, fmt.Errorf("no cpus provided")
|
||||
}
|
||||
|
||||
if len(threads) != cpuset.New(threads...).Size() {
|
||||
return 0, fmt.Errorf("cpus provided are not unique")
|
||||
}
|
||||
|
||||
min := threads[0]
|
||||
for _, thread := range threads[1:] {
|
||||
if thread < min {
|
||||
min = thread
|
||||
}
|
||||
}
|
||||
|
||||
return min, nil
|
||||
}
|
8
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/OWNERS
generated
vendored
Normal file
8
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/OWNERS
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers: []
|
||||
reviewers:
|
||||
- klueska
|
||||
emeritus_approvers:
|
||||
- vishh
|
||||
- jiayingz
|
109
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint/checkpoint.go
generated
vendored
Normal file
109
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint/checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checkpoint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
)
|
||||
|
||||
// DeviceManagerCheckpoint defines the operations to retrieve pod devices
|
||||
type DeviceManagerCheckpoint interface {
|
||||
checkpointmanager.Checkpoint
|
||||
GetData() ([]PodDevicesEntry, map[string][]string)
|
||||
}
|
||||
|
||||
// DevicesPerNUMA represents device ids obtained from device plugin per NUMA node id
|
||||
type DevicesPerNUMA map[int64][]string
|
||||
|
||||
// PodDevicesEntry connects pod information to devices
|
||||
type PodDevicesEntry struct {
|
||||
PodUID string
|
||||
ContainerName string
|
||||
ResourceName string
|
||||
DeviceIDs DevicesPerNUMA
|
||||
AllocResp []byte
|
||||
}
|
||||
|
||||
// checkpointData struct is used to store pod to device allocation information
|
||||
// in a checkpoint file.
|
||||
// TODO: add version control when we need to change checkpoint format.
|
||||
type checkpointData struct {
|
||||
PodDeviceEntries []PodDevicesEntry
|
||||
RegisteredDevices map[string][]string
|
||||
}
|
||||
|
||||
// Data holds checkpoint data and its checksum
|
||||
type Data struct {
|
||||
Data checkpointData
|
||||
Checksum checksum.Checksum
|
||||
}
|
||||
|
||||
// NewDevicesPerNUMA is a function that creates DevicesPerNUMA map
|
||||
func NewDevicesPerNUMA() DevicesPerNUMA {
|
||||
return make(DevicesPerNUMA)
|
||||
}
|
||||
|
||||
// Devices is a function that returns all device ids for all NUMA nodes
|
||||
// and represent it as sets.Set[string]
|
||||
func (dev DevicesPerNUMA) Devices() sets.Set[string] {
|
||||
result := sets.New[string]()
|
||||
|
||||
for _, devs := range dev {
|
||||
result.Insert(devs...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// New returns an instance of Checkpoint - must be an alias for the most recent version
|
||||
func New(devEntries []PodDevicesEntry, devices map[string][]string) DeviceManagerCheckpoint {
|
||||
return newV2(devEntries, devices)
|
||||
}
|
||||
|
||||
func newV2(devEntries []PodDevicesEntry, devices map[string][]string) DeviceManagerCheckpoint {
|
||||
return &Data{
|
||||
Data: checkpointData{
|
||||
PodDeviceEntries: devEntries,
|
||||
RegisteredDevices: devices,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// MarshalCheckpoint returns marshalled data
|
||||
func (cp *Data) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(cp.Data)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint returns unmarshalled data
|
||||
func (cp *Data) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that passed checksum is same as calculated checksum
|
||||
func (cp *Data) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(cp.Data)
|
||||
}
|
||||
|
||||
// GetData returns device entries and registered devices in the *most recent*
|
||||
// checkpoint format, *not* in the original format stored on disk.
|
||||
func (cp *Data) GetData() ([]PodDevicesEntry, map[string][]string) {
|
||||
return cp.Data.PodDeviceEntries, cp.Data.RegisteredDevices
|
||||
}
|
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/endpoint.go
generated
vendored
Normal file
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/endpoint.go
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
plugin "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1"
|
||||
)
|
||||
|
||||
// endpoint maps to a single registered device plugin. It is responsible
|
||||
// for managing gRPC communications with the device plugin and caching
|
||||
// device states reported by the device plugin.
|
||||
type endpoint interface {
|
||||
getPreferredAllocation(available, mustInclude []string, size int) (*pluginapi.PreferredAllocationResponse, error)
|
||||
allocate(devs []string) (*pluginapi.AllocateResponse, error)
|
||||
preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error)
|
||||
setStopTime(t time.Time)
|
||||
isStopped() bool
|
||||
stopGracePeriodExpired() bool
|
||||
}
|
||||
|
||||
type endpointImpl struct {
|
||||
mutex sync.Mutex
|
||||
resourceName string
|
||||
api pluginapi.DevicePluginClient
|
||||
stopTime time.Time
|
||||
client plugin.Client // for testing only
|
||||
}
|
||||
|
||||
// newEndpointImpl creates a new endpoint for the given resourceName.
|
||||
// This is to be used during normal device plugin registration.
|
||||
func newEndpointImpl(p plugin.DevicePlugin) *endpointImpl {
|
||||
return &endpointImpl{
|
||||
api: p.API(),
|
||||
resourceName: p.Resource(),
|
||||
}
|
||||
}
|
||||
|
||||
// newStoppedEndpointImpl creates a new endpoint for the given resourceName with stopTime set.
|
||||
// This is to be used during Kubelet restart, before the actual device plugin re-registers.
|
||||
func newStoppedEndpointImpl(resourceName string) *endpointImpl {
|
||||
return &endpointImpl{
|
||||
resourceName: resourceName,
|
||||
stopTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
func (e *endpointImpl) isStopped() bool {
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
return !e.stopTime.IsZero()
|
||||
}
|
||||
|
||||
func (e *endpointImpl) stopGracePeriodExpired() bool {
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
return !e.stopTime.IsZero() && time.Since(e.stopTime) > endpointStopGracePeriod
|
||||
}
|
||||
|
||||
func (e *endpointImpl) setStopTime(t time.Time) {
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
e.stopTime = t
|
||||
}
|
||||
|
||||
// getPreferredAllocation issues GetPreferredAllocation gRPC call to the device plugin.
|
||||
func (e *endpointImpl) getPreferredAllocation(available, mustInclude []string, size int) (*pluginapi.PreferredAllocationResponse, error) {
|
||||
if e.isStopped() {
|
||||
return nil, fmt.Errorf(errEndpointStopped, e)
|
||||
}
|
||||
return e.api.GetPreferredAllocation(context.Background(), &pluginapi.PreferredAllocationRequest{
|
||||
ContainerRequests: []*pluginapi.ContainerPreferredAllocationRequest{
|
||||
{
|
||||
AvailableDeviceIDs: available,
|
||||
MustIncludeDeviceIDs: mustInclude,
|
||||
AllocationSize: int32(size),
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// allocate issues Allocate gRPC call to the device plugin.
|
||||
func (e *endpointImpl) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
|
||||
if e.isStopped() {
|
||||
return nil, fmt.Errorf(errEndpointStopped, e)
|
||||
}
|
||||
return e.api.Allocate(context.Background(), &pluginapi.AllocateRequest{
|
||||
ContainerRequests: []*pluginapi.ContainerAllocateRequest{
|
||||
{DevicesIDs: devs},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// preStartContainer issues PreStartContainer gRPC call to the device plugin.
|
||||
func (e *endpointImpl) preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error) {
|
||||
if e.isStopped() {
|
||||
return nil, fmt.Errorf(errEndpointStopped, e)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), pluginapi.KubeletPreStartContainerRPCTimeoutInSecs*time.Second)
|
||||
defer cancel()
|
||||
return e.api.PreStartContainer(ctx, &pluginapi.PreStartContainerRequest{
|
||||
DevicesIDs: devs,
|
||||
})
|
||||
}
|
1185
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager.go
generated
vendored
Normal file
1185
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
49
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/api.go
generated
vendored
Normal file
49
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/api.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1beta1
|
||||
|
||||
import (
|
||||
api "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
)
|
||||
|
||||
// RegistrationHandler is an interface for handling device plugin registration
|
||||
// and plugin directory cleanup.
|
||||
type RegistrationHandler interface {
|
||||
CleanupPluginDirectory(string) error
|
||||
}
|
||||
|
||||
// ClientHandler is an interface for handling device plugin connections.
|
||||
type ClientHandler interface {
|
||||
PluginConnected(string, DevicePlugin) error
|
||||
PluginDisconnected(string)
|
||||
PluginListAndWatchReceiver(string, *api.ListAndWatchResponse)
|
||||
}
|
||||
|
||||
// TODO: evaluate whether we need these error definitions.
|
||||
const (
|
||||
// errFailedToDialDevicePlugin is the error raised when the device plugin could not be
|
||||
// reached on the registered socket
|
||||
errFailedToDialDevicePlugin = "failed to dial device plugin:"
|
||||
// errUnsupportedVersion is the error raised when the device plugin uses an API version not
|
||||
// supported by the Kubelet registry
|
||||
errUnsupportedVersion = "requested API version %q is not supported by kubelet. Supported version is %q"
|
||||
// errInvalidResourceName is the error raised when a device plugin is registering
|
||||
// itself with an invalid ResourceName
|
||||
errInvalidResourceName = "the ResourceName %q is invalid"
|
||||
// errBadSocket is the error raised when the registry socket path is not absolute
|
||||
errBadSocket = "bad socketPath, must be an absolute path:"
|
||||
)
|
143
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/client.go
generated
vendored
Normal file
143
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/client.go
generated
vendored
Normal file
@ -0,0 +1,143 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1beta1
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
api "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
)
|
||||
|
||||
// DevicePlugin interface provides methods for accessing Device Plugin resources, API and unix socket.
|
||||
type DevicePlugin interface {
|
||||
API() api.DevicePluginClient
|
||||
Resource() string
|
||||
SocketPath() string
|
||||
}
|
||||
|
||||
// Client interface provides methods for establishing/closing gRPC connection and running the device plugin gRPC client.
|
||||
type Client interface {
|
||||
Connect() error
|
||||
Run()
|
||||
Disconnect() error
|
||||
}
|
||||
|
||||
type client struct {
|
||||
mutex sync.Mutex
|
||||
resource string
|
||||
socket string
|
||||
grpc *grpc.ClientConn
|
||||
handler ClientHandler
|
||||
client api.DevicePluginClient
|
||||
}
|
||||
|
||||
// NewPluginClient returns an initialized device plugin client.
|
||||
func NewPluginClient(r string, socketPath string, h ClientHandler) Client {
|
||||
return &client{
|
||||
resource: r,
|
||||
socket: socketPath,
|
||||
handler: h,
|
||||
}
|
||||
}
|
||||
|
||||
// Connect is for establishing a gRPC connection between device manager and device plugin.
|
||||
func (c *client) Connect() error {
|
||||
client, conn, err := dial(c.socket)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Unable to connect to device plugin client with socket path", "path", c.socket)
|
||||
return err
|
||||
}
|
||||
c.mutex.Lock()
|
||||
c.grpc = conn
|
||||
c.client = client
|
||||
c.mutex.Unlock()
|
||||
return c.handler.PluginConnected(c.resource, c)
|
||||
}
|
||||
|
||||
// Run is for running the device plugin gRPC client.
|
||||
func (c *client) Run() {
|
||||
stream, err := c.client.ListAndWatch(context.Background(), &api.Empty{})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "ListAndWatch ended unexpectedly for device plugin", "resource", c.resource)
|
||||
return
|
||||
}
|
||||
|
||||
for {
|
||||
response, err := stream.Recv()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "ListAndWatch ended unexpectedly for device plugin", "resource", c.resource)
|
||||
return
|
||||
}
|
||||
klog.V(2).InfoS("State pushed for device plugin", "resource", c.resource, "resourceCapacity", len(response.Devices))
|
||||
c.handler.PluginListAndWatchReceiver(c.resource, response)
|
||||
}
|
||||
}
|
||||
|
||||
// Disconnect is for closing gRPC connection between device manager and device plugin.
|
||||
func (c *client) Disconnect() error {
|
||||
c.mutex.Lock()
|
||||
if c.grpc != nil {
|
||||
if err := c.grpc.Close(); err != nil {
|
||||
klog.V(2).ErrorS(err, "Failed to close grcp connection", "resource", c.Resource())
|
||||
}
|
||||
c.grpc = nil
|
||||
}
|
||||
c.mutex.Unlock()
|
||||
c.handler.PluginDisconnected(c.resource)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *client) Resource() string {
|
||||
return c.resource
|
||||
}
|
||||
|
||||
func (c *client) API() api.DevicePluginClient {
|
||||
return c.client
|
||||
}
|
||||
|
||||
func (c *client) SocketPath() string {
|
||||
return c.socket
|
||||
}
|
||||
|
||||
// dial establishes the gRPC communication with the registered device plugin. https://godoc.org/google.golang.org/grpc#Dial
|
||||
func dial(unixSocketPath string) (api.DevicePluginClient, *grpc.ClientConn, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
c, err := grpc.DialContext(ctx, unixSocketPath,
|
||||
grpc.WithAuthority("localhost"),
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithBlock(),
|
||||
grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
|
||||
return (&net.Dialer{}).DialContext(ctx, "unix", addr)
|
||||
}),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf(errFailedToDialDevicePlugin+" %v", err)
|
||||
}
|
||||
|
||||
return api.NewDevicePluginClient(c), c, nil
|
||||
}
|
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/handler.go
generated
vendored
Normal file
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/handler.go
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1beta1
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
core "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
api "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
func (s *server) GetPluginHandler() cache.PluginHandler {
|
||||
if f, err := os.Create(s.socketDir + "DEPRECATION"); err != nil {
|
||||
klog.ErrorS(err, "Failed to create deprecation file at socket dir", "path", s.socketDir)
|
||||
} else {
|
||||
f.Close()
|
||||
klog.V(4).InfoS("Created deprecation file", "path", f.Name())
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *server) RegisterPlugin(pluginName string, endpoint string, versions []string, pluginClientTimeout *time.Duration) error {
|
||||
klog.V(2).InfoS("Registering plugin at endpoint", "plugin", pluginName, "endpoint", endpoint)
|
||||
return s.connectClient(pluginName, endpoint)
|
||||
}
|
||||
|
||||
func (s *server) DeRegisterPlugin(pluginName string) {
|
||||
klog.V(2).InfoS("Deregistering plugin", "plugin", pluginName)
|
||||
client := s.getClient(pluginName)
|
||||
if client != nil {
|
||||
s.disconnectClient(pluginName, client)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *server) ValidatePlugin(pluginName string, endpoint string, versions []string) error {
|
||||
klog.V(2).InfoS("Got plugin at endpoint with versions", "plugin", pluginName, "endpoint", endpoint, "versions", versions)
|
||||
|
||||
if !s.isVersionCompatibleWithPlugin(versions...) {
|
||||
return fmt.Errorf("manager version, %s, is not among plugin supported versions %v", api.Version, versions)
|
||||
}
|
||||
|
||||
if !v1helper.IsExtendedResourceName(core.ResourceName(pluginName)) {
|
||||
return fmt.Errorf("invalid name of device plugin socket: %s", fmt.Sprintf(errInvalidResourceName, pluginName))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *server) connectClient(name string, socketPath string) error {
|
||||
c := NewPluginClient(name, socketPath, s.chandler)
|
||||
|
||||
s.registerClient(name, c)
|
||||
if err := c.Connect(); err != nil {
|
||||
s.deregisterClient(name)
|
||||
klog.ErrorS(err, "Failed to connect to new client", "resource", name)
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
s.runClient(name, c)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *server) disconnectClient(name string, c Client) error {
|
||||
s.deregisterClient(name)
|
||||
return c.Disconnect()
|
||||
}
|
||||
|
||||
func (s *server) registerClient(name string, c Client) {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
s.clients[name] = c
|
||||
klog.V(2).InfoS("Registered client", "name", name)
|
||||
}
|
||||
|
||||
func (s *server) deregisterClient(name string) {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
delete(s.clients, name)
|
||||
klog.V(2).InfoS("Deregistered client", "name", name)
|
||||
}
|
||||
|
||||
func (s *server) runClient(name string, c Client) {
|
||||
c.Run()
|
||||
|
||||
c = s.getClient(name)
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err := s.disconnectClient(name, c); err != nil {
|
||||
klog.V(2).InfoS("Unable to disconnect client", "resource", name, "client", c, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *server) getClient(name string) Client {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
return s.clients[name]
|
||||
}
|
224
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/server.go
generated
vendored
Normal file
224
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/server.go
generated
vendored
Normal file
@ -0,0 +1,224 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1beta1
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"google.golang.org/grpc"
|
||||
|
||||
core "k8s.io/api/core/v1"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
"k8s.io/klog/v2"
|
||||
api "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
// Server interface provides methods for Device plugin registration server.
|
||||
type Server interface {
|
||||
cache.PluginHandler
|
||||
healthz.HealthChecker
|
||||
Start() error
|
||||
Stop() error
|
||||
SocketPath() string
|
||||
}
|
||||
|
||||
type server struct {
|
||||
socketName string
|
||||
socketDir string
|
||||
mutex sync.Mutex
|
||||
wg sync.WaitGroup
|
||||
grpc *grpc.Server
|
||||
rhandler RegistrationHandler
|
||||
chandler ClientHandler
|
||||
clients map[string]Client
|
||||
|
||||
// isStarted indicates whether the service has started successfully.
|
||||
isStarted bool
|
||||
}
|
||||
|
||||
// NewServer returns an initialized device plugin registration server.
|
||||
func NewServer(socketPath string, rh RegistrationHandler, ch ClientHandler) (Server, error) {
|
||||
if socketPath == "" || !filepath.IsAbs(socketPath) {
|
||||
return nil, fmt.Errorf(errBadSocket+" %s", socketPath)
|
||||
}
|
||||
|
||||
dir, name := filepath.Split(socketPath)
|
||||
|
||||
klog.V(2).InfoS("Creating device plugin registration server", "version", api.Version, "socket", socketPath)
|
||||
s := &server{
|
||||
socketName: name,
|
||||
socketDir: dir,
|
||||
rhandler: rh,
|
||||
chandler: ch,
|
||||
clients: make(map[string]Client),
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *server) Start() error {
|
||||
klog.V(2).InfoS("Starting device plugin registration server")
|
||||
|
||||
if err := os.MkdirAll(s.socketDir, 0750); err != nil {
|
||||
klog.ErrorS(err, "Failed to create the device plugin socket directory", "directory", s.socketDir)
|
||||
return err
|
||||
}
|
||||
|
||||
if selinux.GetEnabled() {
|
||||
if err := selinux.SetFileLabel(s.socketDir, config.KubeletPluginsDirSELinuxLabel); err != nil {
|
||||
klog.InfoS("Unprivileged containerized plugins might not work. Could not set selinux context on socket dir", "path", s.socketDir, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// For now, we leave cleanup of the *entire* directory up to the Handler
|
||||
// (even though we should in theory be able to just wipe the whole directory)
|
||||
// because the Handler stores its checkpoint file (amongst others) in here.
|
||||
if err := s.rhandler.CleanupPluginDirectory(s.socketDir); err != nil {
|
||||
klog.ErrorS(err, "Failed to cleanup the device plugin directory", "directory", s.socketDir)
|
||||
return err
|
||||
}
|
||||
|
||||
ln, err := net.Listen("unix", s.SocketPath())
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to listen to socket while starting device plugin registry")
|
||||
return err
|
||||
}
|
||||
|
||||
s.wg.Add(1)
|
||||
s.grpc = grpc.NewServer([]grpc.ServerOption{}...)
|
||||
|
||||
api.RegisterRegistrationServer(s.grpc, s)
|
||||
go func() {
|
||||
defer s.wg.Done()
|
||||
s.setHealthy()
|
||||
if err = s.grpc.Serve(ln); err != nil {
|
||||
s.setUnhealthy()
|
||||
klog.ErrorS(err, "Error while serving device plugin registration grpc server")
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *server) Stop() error {
|
||||
s.visitClients(func(r string, c Client) {
|
||||
if err := s.disconnectClient(r, c); err != nil {
|
||||
klog.InfoS("Error disconnecting device plugin client", "resourceName", r, "err", err)
|
||||
}
|
||||
})
|
||||
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
if s.grpc == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
s.grpc.Stop()
|
||||
s.wg.Wait()
|
||||
s.grpc = nil
|
||||
// During kubelet termination, we do not need the registration server,
|
||||
// and we consider the kubelet to be healthy even when it is down.
|
||||
s.setHealthy()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *server) SocketPath() string {
|
||||
return filepath.Join(s.socketDir, s.socketName)
|
||||
}
|
||||
|
||||
func (s *server) Register(ctx context.Context, r *api.RegisterRequest) (*api.Empty, error) {
|
||||
klog.InfoS("Got registration request from device plugin with resource", "resourceName", r.ResourceName)
|
||||
metrics.DevicePluginRegistrationCount.WithLabelValues(r.ResourceName).Inc()
|
||||
|
||||
if !s.isVersionCompatibleWithPlugin(r.Version) {
|
||||
err := fmt.Errorf(errUnsupportedVersion, r.Version, api.SupportedVersions)
|
||||
klog.InfoS("Bad registration request from device plugin with resource", "resourceName", r.ResourceName, "err", err)
|
||||
return &api.Empty{}, err
|
||||
}
|
||||
|
||||
if !v1helper.IsExtendedResourceName(core.ResourceName(r.ResourceName)) {
|
||||
err := fmt.Errorf(errInvalidResourceName, r.ResourceName)
|
||||
klog.InfoS("Bad registration request from device plugin", "err", err)
|
||||
return &api.Empty{}, err
|
||||
}
|
||||
|
||||
if err := s.connectClient(r.ResourceName, filepath.Join(s.socketDir, r.Endpoint)); err != nil {
|
||||
klog.InfoS("Error connecting to device plugin client", "err", err)
|
||||
return &api.Empty{}, err
|
||||
}
|
||||
|
||||
return &api.Empty{}, nil
|
||||
}
|
||||
|
||||
func (s *server) isVersionCompatibleWithPlugin(versions ...string) bool {
|
||||
// TODO(vikasc): Currently this is fine as we only have a single supported version. When we do need to support
|
||||
// multiple versions in the future, we may need to extend this function to return a supported version.
|
||||
// E.g., say kubelet supports v1beta1 and v1beta2, and we get v1alpha1 and v1beta1 from a device plugin,
|
||||
// this function should return v1beta1
|
||||
for _, version := range versions {
|
||||
for _, supportedVersion := range api.SupportedVersions {
|
||||
if version == supportedVersion {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *server) visitClients(visit func(r string, c Client)) {
|
||||
s.mutex.Lock()
|
||||
for r, c := range s.clients {
|
||||
s.mutex.Unlock()
|
||||
visit(r, c)
|
||||
s.mutex.Lock()
|
||||
}
|
||||
s.mutex.Unlock()
|
||||
}
|
||||
|
||||
func (s *server) Name() string {
|
||||
return "device-plugin"
|
||||
}
|
||||
|
||||
func (s *server) Check(_ *http.Request) error {
|
||||
if s.isStarted {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("device plugin registration gRPC server failed and no device plugins can register")
|
||||
}
|
||||
|
||||
// setHealthy sets the health status of the gRPC server.
|
||||
func (s *server) setHealthy() {
|
||||
s.isStarted = true
|
||||
}
|
||||
|
||||
// setUnhealthy sets the health status of the gRPC server to unhealthy.
|
||||
func (s *server) setUnhealthy() {
|
||||
s.isStarted = false
|
||||
}
|
388
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/stub.go
generated
vendored
Normal file
388
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1/stub.go
generated
vendored
Normal file
@ -0,0 +1,388 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1beta1
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/klog/v2"
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
watcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
|
||||
)
|
||||
|
||||
// Stub implementation for DevicePlugin.
|
||||
type Stub struct {
|
||||
devs []*pluginapi.Device
|
||||
socket string
|
||||
resourceName string
|
||||
preStartContainerFlag bool
|
||||
getPreferredAllocationFlag bool
|
||||
|
||||
stop chan interface{}
|
||||
wg sync.WaitGroup
|
||||
update chan []*pluginapi.Device
|
||||
|
||||
server *grpc.Server
|
||||
|
||||
// allocFunc is used for handling allocation request
|
||||
allocFunc stubAllocFunc
|
||||
|
||||
// getPreferredAllocFunc is used for handling getPreferredAllocation request
|
||||
getPreferredAllocFunc stubGetPreferredAllocFunc
|
||||
|
||||
// registerControlFunc is used for controlling auto-registration of requests
|
||||
registerControlFunc stubRegisterControlFunc
|
||||
|
||||
registrationStatus chan watcherapi.RegistrationStatus // for testing
|
||||
endpoint string // for testing
|
||||
|
||||
kubeletRestartWatcher *fsnotify.Watcher
|
||||
}
|
||||
|
||||
// stubGetPreferredAllocFunc is the function called when a getPreferredAllocation request is received from Kubelet
|
||||
type stubGetPreferredAllocFunc func(r *pluginapi.PreferredAllocationRequest, devs map[string]pluginapi.Device) (*pluginapi.PreferredAllocationResponse, error)
|
||||
|
||||
func defaultGetPreferredAllocFunc(r *pluginapi.PreferredAllocationRequest, devs map[string]pluginapi.Device) (*pluginapi.PreferredAllocationResponse, error) {
|
||||
var response pluginapi.PreferredAllocationResponse
|
||||
|
||||
return &response, nil
|
||||
}
|
||||
|
||||
// stubAllocFunc is the function called when an allocation request is received from Kubelet
|
||||
type stubAllocFunc func(r *pluginapi.AllocateRequest, devs map[string]pluginapi.Device) (*pluginapi.AllocateResponse, error)
|
||||
|
||||
func defaultAllocFunc(r *pluginapi.AllocateRequest, devs map[string]pluginapi.Device) (*pluginapi.AllocateResponse, error) {
|
||||
var response pluginapi.AllocateResponse
|
||||
|
||||
return &response, nil
|
||||
}
|
||||
|
||||
// stubRegisterControlFunc is the function called when a registration request is received from Kubelet
|
||||
type stubRegisterControlFunc func() bool
|
||||
|
||||
func defaultRegisterControlFunc() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// NewDevicePluginStub returns an initialized DevicePlugin Stub.
|
||||
func NewDevicePluginStub(devs []*pluginapi.Device, socket string, name string, preStartContainerFlag bool, getPreferredAllocationFlag bool) *Stub {
|
||||
|
||||
watcher, err := fsnotify.NewWatcher()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Watcher creation failed")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return &Stub{
|
||||
devs: devs,
|
||||
socket: socket,
|
||||
resourceName: name,
|
||||
preStartContainerFlag: preStartContainerFlag,
|
||||
getPreferredAllocationFlag: getPreferredAllocationFlag,
|
||||
registerControlFunc: defaultRegisterControlFunc,
|
||||
|
||||
stop: make(chan interface{}),
|
||||
update: make(chan []*pluginapi.Device),
|
||||
|
||||
allocFunc: defaultAllocFunc,
|
||||
getPreferredAllocFunc: defaultGetPreferredAllocFunc,
|
||||
kubeletRestartWatcher: watcher,
|
||||
}
|
||||
}
|
||||
|
||||
// SetGetPreferredAllocFunc sets allocFunc of the device plugin
|
||||
func (m *Stub) SetGetPreferredAllocFunc(f stubGetPreferredAllocFunc) {
|
||||
m.getPreferredAllocFunc = f
|
||||
}
|
||||
|
||||
// SetAllocFunc sets allocFunc of the device plugin
|
||||
func (m *Stub) SetAllocFunc(f stubAllocFunc) {
|
||||
m.allocFunc = f
|
||||
}
|
||||
|
||||
// SetRegisterControlFunc sets RegisterControlFunc of the device plugin
|
||||
func (m *Stub) SetRegisterControlFunc(f stubRegisterControlFunc) {
|
||||
m.registerControlFunc = f
|
||||
}
|
||||
|
||||
// Start starts the gRPC server of the device plugin. Can only
|
||||
// be called once.
|
||||
func (m *Stub) Start() error {
|
||||
klog.InfoS("Starting device plugin server")
|
||||
err := m.cleanup()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sock, err := net.Listen("unix", m.socket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.wg.Add(1)
|
||||
m.server = grpc.NewServer([]grpc.ServerOption{}...)
|
||||
pluginapi.RegisterDevicePluginServer(m.server, m)
|
||||
watcherapi.RegisterRegistrationServer(m.server, m)
|
||||
|
||||
err = m.kubeletRestartWatcher.Add(filepath.Dir(m.socket))
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to add watch", "devicePluginPath", pluginapi.DevicePluginPath)
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer m.wg.Done()
|
||||
if err = m.server.Serve(sock); err != nil {
|
||||
klog.ErrorS(err, "Error while serving device plugin registration grpc server")
|
||||
}
|
||||
}()
|
||||
|
||||
var lastDialErr error
|
||||
wait.PollImmediate(1*time.Second, 10*time.Second, func() (bool, error) {
|
||||
var conn *grpc.ClientConn
|
||||
_, conn, lastDialErr = dial(m.socket)
|
||||
if lastDialErr != nil {
|
||||
return false, nil
|
||||
}
|
||||
conn.Close()
|
||||
return true, nil
|
||||
})
|
||||
if lastDialErr != nil {
|
||||
return lastDialErr
|
||||
}
|
||||
|
||||
klog.InfoS("Starting to serve on socket", "socket", m.socket)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Stub) Restart() error {
|
||||
klog.InfoS("Restarting Device Plugin server")
|
||||
if m.server == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.server.Stop()
|
||||
m.server = nil
|
||||
|
||||
return m.Start()
|
||||
}
|
||||
|
||||
// Stop stops the gRPC server. Can be called without a prior Start
|
||||
// and more than once. Not safe to be called concurrently by different
|
||||
// goroutines!
|
||||
func (m *Stub) Stop() error {
|
||||
klog.InfoS("Stopping device plugin server")
|
||||
if m.server == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.kubeletRestartWatcher.Close()
|
||||
|
||||
m.server.Stop()
|
||||
m.wg.Wait()
|
||||
m.server = nil
|
||||
close(m.stop) // This prevents re-starting the server.
|
||||
|
||||
return m.cleanup()
|
||||
}
|
||||
|
||||
func (m *Stub) Watch(kubeletEndpoint, resourceName, pluginSockDir string) {
|
||||
for {
|
||||
select {
|
||||
// Detect a kubelet restart by watching for a newly created
|
||||
// 'pluginapi.KubeletSocket' file. When this occurs, restart
|
||||
// the device plugin server
|
||||
case event := <-m.kubeletRestartWatcher.Events:
|
||||
if event.Name == kubeletEndpoint && event.Op&fsnotify.Create == fsnotify.Create {
|
||||
klog.InfoS("inotify: file created, restarting", "kubeletEndpoint", kubeletEndpoint)
|
||||
var lastErr error
|
||||
|
||||
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 2*time.Minute, false, func(context.Context) (done bool, err error) {
|
||||
restartErr := m.Restart()
|
||||
if restartErr == nil {
|
||||
return true, nil
|
||||
}
|
||||
klog.ErrorS(restartErr, "Retrying after error")
|
||||
lastErr = restartErr
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Unable to restart server: wait timed out", "lastErr", lastErr.Error())
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if ok := m.registerControlFunc(); ok {
|
||||
if err := m.Register(kubeletEndpoint, resourceName, pluginSockDir); err != nil {
|
||||
klog.ErrorS(err, "Unable to register to kubelet")
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Watch for any other fs errors and log them.
|
||||
case err := <-m.kubeletRestartWatcher.Errors:
|
||||
klog.ErrorS(err, "inotify error")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetInfo is the RPC which return pluginInfo
|
||||
func (m *Stub) GetInfo(ctx context.Context, req *watcherapi.InfoRequest) (*watcherapi.PluginInfo, error) {
|
||||
klog.InfoS("GetInfo")
|
||||
return &watcherapi.PluginInfo{
|
||||
Type: watcherapi.DevicePlugin,
|
||||
Name: m.resourceName,
|
||||
Endpoint: m.endpoint,
|
||||
SupportedVersions: []string{pluginapi.Version}}, nil
|
||||
}
|
||||
|
||||
// NotifyRegistrationStatus receives the registration notification from watcher
|
||||
func (m *Stub) NotifyRegistrationStatus(ctx context.Context, status *watcherapi.RegistrationStatus) (*watcherapi.RegistrationStatusResponse, error) {
|
||||
if m.registrationStatus != nil {
|
||||
m.registrationStatus <- *status
|
||||
}
|
||||
if !status.PluginRegistered {
|
||||
klog.InfoS("Registration failed", "err", status.Error)
|
||||
}
|
||||
return &watcherapi.RegistrationStatusResponse{}, nil
|
||||
}
|
||||
|
||||
// Register registers the device plugin for the given resourceName with Kubelet.
|
||||
func (m *Stub) Register(kubeletEndpoint, resourceName string, pluginSockDir string) error {
|
||||
klog.InfoS("Register", "kubeletEndpoint", kubeletEndpoint, "resourceName", resourceName, "socket", pluginSockDir)
|
||||
|
||||
if pluginSockDir != "" {
|
||||
if _, err := os.Stat(pluginSockDir + "DEPRECATION"); err == nil {
|
||||
klog.InfoS("Deprecation file found. Skip registration")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
klog.InfoS("Deprecation file not found. Invoke registration")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := grpc.DialContext(ctx, kubeletEndpoint,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithBlock(),
|
||||
grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
|
||||
return (&net.Dialer{}).DialContext(ctx, "unix", addr)
|
||||
}))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pluginapi.NewRegistrationClient(conn)
|
||||
reqt := &pluginapi.RegisterRequest{
|
||||
Version: pluginapi.Version,
|
||||
Endpoint: filepath.Base(m.socket),
|
||||
ResourceName: resourceName,
|
||||
Options: &pluginapi.DevicePluginOptions{
|
||||
PreStartRequired: m.preStartContainerFlag,
|
||||
GetPreferredAllocationAvailable: m.getPreferredAllocationFlag,
|
||||
},
|
||||
}
|
||||
|
||||
_, err = client.Register(context.Background(), reqt)
|
||||
if err != nil {
|
||||
// Stop server
|
||||
m.server.Stop()
|
||||
klog.ErrorS(err, "Client unable to register to kubelet")
|
||||
return err
|
||||
}
|
||||
klog.InfoS("Device Plugin registered with the Kubelet")
|
||||
return err
|
||||
}
|
||||
|
||||
// GetDevicePluginOptions returns DevicePluginOptions settings for the device plugin.
|
||||
func (m *Stub) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
|
||||
options := &pluginapi.DevicePluginOptions{
|
||||
PreStartRequired: m.preStartContainerFlag,
|
||||
GetPreferredAllocationAvailable: m.getPreferredAllocationFlag,
|
||||
}
|
||||
return options, nil
|
||||
}
|
||||
|
||||
// PreStartContainer resets the devices received
|
||||
func (m *Stub) PreStartContainer(ctx context.Context, r *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
|
||||
klog.InfoS("PreStartContainer", "request", r)
|
||||
return &pluginapi.PreStartContainerResponse{}, nil
|
||||
}
|
||||
|
||||
// ListAndWatch lists devices and update that list according to the Update call
|
||||
func (m *Stub) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
|
||||
klog.InfoS("ListAndWatch")
|
||||
|
||||
s.Send(&pluginapi.ListAndWatchResponse{Devices: m.devs})
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-m.stop:
|
||||
return nil
|
||||
case updated := <-m.update:
|
||||
s.Send(&pluginapi.ListAndWatchResponse{Devices: updated})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update allows the device plugin to send new devices through ListAndWatch
|
||||
func (m *Stub) Update(devs []*pluginapi.Device) {
|
||||
m.update <- devs
|
||||
}
|
||||
|
||||
// GetPreferredAllocation gets the preferred allocation from a set of available devices
|
||||
func (m *Stub) GetPreferredAllocation(ctx context.Context, r *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
|
||||
klog.InfoS("GetPreferredAllocation", "request", r)
|
||||
|
||||
devs := make(map[string]pluginapi.Device)
|
||||
|
||||
for _, dev := range m.devs {
|
||||
devs[dev.ID] = *dev
|
||||
}
|
||||
|
||||
return m.getPreferredAllocFunc(r, devs)
|
||||
}
|
||||
|
||||
// Allocate does a mock allocation
|
||||
func (m *Stub) Allocate(ctx context.Context, r *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
|
||||
klog.InfoS("Allocate", "request", r)
|
||||
|
||||
devs := make(map[string]pluginapi.Device)
|
||||
|
||||
for _, dev := range m.devs {
|
||||
devs[dev.ID] = *dev
|
||||
}
|
||||
|
||||
return m.allocFunc(r, devs)
|
||||
}
|
||||
|
||||
func (m *Stub) cleanup() error {
|
||||
if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
456
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/pod_devices.go
generated
vendored
Normal file
456
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/pod_devices.go
generated
vendored
Normal file
@ -0,0 +1,456 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
type deviceAllocateInfo struct {
|
||||
// deviceIds contains device Ids allocated to this container for the given resourceName.
|
||||
deviceIds checkpoint.DevicesPerNUMA
|
||||
// allocResp contains cached rpc AllocateResponse.
|
||||
allocResp *pluginapi.ContainerAllocateResponse
|
||||
}
|
||||
|
||||
type resourceAllocateInfo map[string]deviceAllocateInfo // Keyed by resourceName.
|
||||
type containerDevices map[string]resourceAllocateInfo // Keyed by containerName.
|
||||
type podDevices struct {
|
||||
sync.RWMutex
|
||||
devs map[string]containerDevices // Keyed by podUID.
|
||||
}
|
||||
|
||||
// NewPodDevices is a function that returns object of podDevices type with its own guard
|
||||
// RWMutex and a map where key is a pod UID and value contains
|
||||
// container devices information of type containerDevices.
|
||||
func newPodDevices() *podDevices {
|
||||
return &podDevices{devs: make(map[string]containerDevices)}
|
||||
}
|
||||
|
||||
func (pdev *podDevices) pods() sets.Set[string] {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
ret := sets.New[string]()
|
||||
for k := range pdev.devs {
|
||||
ret.Insert(k)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (pdev *podDevices) size() int {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
return len(pdev.devs)
|
||||
}
|
||||
|
||||
func (pdev *podDevices) hasPod(podUID string) bool {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
_, podExists := pdev.devs[podUID]
|
||||
return podExists
|
||||
}
|
||||
|
||||
func (pdev *podDevices) insert(podUID, contName, resource string, devices checkpoint.DevicesPerNUMA, resp *pluginapi.ContainerAllocateResponse) {
|
||||
pdev.Lock()
|
||||
defer pdev.Unlock()
|
||||
if _, podExists := pdev.devs[podUID]; !podExists {
|
||||
pdev.devs[podUID] = make(containerDevices)
|
||||
}
|
||||
if _, contExists := pdev.devs[podUID][contName]; !contExists {
|
||||
pdev.devs[podUID][contName] = make(resourceAllocateInfo)
|
||||
}
|
||||
pdev.devs[podUID][contName][resource] = deviceAllocateInfo{
|
||||
deviceIds: devices,
|
||||
allocResp: resp,
|
||||
}
|
||||
}
|
||||
|
||||
func (pdev *podDevices) delete(pods []string) {
|
||||
pdev.Lock()
|
||||
defer pdev.Unlock()
|
||||
for _, uid := range pods {
|
||||
delete(pdev.devs, uid)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns list of device Ids allocated to the given pod for the given resource.
|
||||
// Returns nil if we don't have cached state for the given <podUID, resource>.
|
||||
func (pdev *podDevices) podDevices(podUID, resource string) sets.Set[string] {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
|
||||
ret := sets.New[string]()
|
||||
for contName := range pdev.devs[podUID] {
|
||||
ret = ret.Union(pdev.containerDevices(podUID, contName, resource))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// Returns list of device Ids allocated to the given container for the given resource.
|
||||
// Returns nil if we don't have cached state for the given <podUID, contName, resource>.
|
||||
func (pdev *podDevices) containerDevices(podUID, contName, resource string) sets.Set[string] {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
if _, podExists := pdev.devs[podUID]; !podExists {
|
||||
return nil
|
||||
}
|
||||
if _, contExists := pdev.devs[podUID][contName]; !contExists {
|
||||
return nil
|
||||
}
|
||||
devs, resourceExists := pdev.devs[podUID][contName][resource]
|
||||
if !resourceExists {
|
||||
return nil
|
||||
}
|
||||
return devs.deviceIds.Devices()
|
||||
}
|
||||
|
||||
// Populates allocatedResources with the device resources allocated to the specified <podUID, contName>.
|
||||
func (pdev *podDevices) addContainerAllocatedResources(podUID, contName string, allocatedResources map[string]sets.Set[string]) {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
containers, exists := pdev.devs[podUID]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
resources, exists := containers[contName]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
for resource, devices := range resources {
|
||||
allocatedResources[resource] = allocatedResources[resource].Union(devices.deviceIds.Devices())
|
||||
}
|
||||
}
|
||||
|
||||
// Removes the device resources allocated to the specified <podUID, contName> from allocatedResources.
|
||||
func (pdev *podDevices) removeContainerAllocatedResources(podUID, contName string, allocatedResources map[string]sets.Set[string]) {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
containers, exists := pdev.devs[podUID]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
resources, exists := containers[contName]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
for resource, devices := range resources {
|
||||
allocatedResources[resource] = allocatedResources[resource].Difference(devices.deviceIds.Devices())
|
||||
}
|
||||
}
|
||||
|
||||
// Returns all devices allocated to the pods being tracked, keyed by resourceName.
|
||||
func (pdev *podDevices) devices() map[string]sets.Set[string] {
|
||||
ret := make(map[string]sets.Set[string])
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
for _, containerDevices := range pdev.devs {
|
||||
for _, resources := range containerDevices {
|
||||
for resource, devices := range resources {
|
||||
if _, exists := ret[resource]; !exists {
|
||||
ret[resource] = sets.New[string]()
|
||||
}
|
||||
if devices.allocResp != nil {
|
||||
ret[resource] = ret[resource].Union(devices.deviceIds.Devices())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// Returns podUID and containerName for a device
|
||||
func (pdev *podDevices) getPodAndContainerForDevice(deviceID string) (string, string) {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
for podUID, containerDevices := range pdev.devs {
|
||||
for containerName, resources := range containerDevices {
|
||||
for _, devices := range resources {
|
||||
if devices.deviceIds.Devices().Has(deviceID) {
|
||||
return podUID, containerName
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// Turns podDevices to checkpointData.
|
||||
func (pdev *podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
|
||||
var data []checkpoint.PodDevicesEntry
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
for podUID, containerDevices := range pdev.devs {
|
||||
for conName, resources := range containerDevices {
|
||||
for resource, devices := range resources {
|
||||
if devices.allocResp == nil {
|
||||
klog.ErrorS(nil, "Can't marshal allocResp, allocation response is missing", "podUID", podUID, "containerName", conName, "resourceName", resource)
|
||||
continue
|
||||
}
|
||||
|
||||
allocResp, err := devices.allocResp.Marshal()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Can't marshal allocResp", "podUID", podUID, "containerName", conName, "resourceName", resource)
|
||||
continue
|
||||
}
|
||||
data = append(data, checkpoint.PodDevicesEntry{
|
||||
PodUID: podUID,
|
||||
ContainerName: conName,
|
||||
ResourceName: resource,
|
||||
DeviceIDs: devices.deviceIds,
|
||||
AllocResp: allocResp})
|
||||
}
|
||||
}
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
// Populates podDevices from the passed in checkpointData.
|
||||
func (pdev *podDevices) fromCheckpointData(data []checkpoint.PodDevicesEntry) {
|
||||
for _, entry := range data {
|
||||
klog.V(2).InfoS("Get checkpoint entry",
|
||||
"podUID", entry.PodUID, "containerName", entry.ContainerName,
|
||||
"resourceName", entry.ResourceName, "deviceIDs", entry.DeviceIDs, "allocated", entry.AllocResp)
|
||||
allocResp := &pluginapi.ContainerAllocateResponse{}
|
||||
err := allocResp.Unmarshal(entry.AllocResp)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Can't unmarshal allocResp", "podUID", entry.PodUID, "containerName", entry.ContainerName, "resourceName", entry.ResourceName)
|
||||
continue
|
||||
}
|
||||
pdev.insert(entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, allocResp)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns combined container runtime settings to consume the container's allocated devices.
|
||||
func (pdev *podDevices) deviceRunContainerOptions(podUID, contName string) *DeviceRunContainerOptions {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
|
||||
containers, exists := pdev.devs[podUID]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
resources, exists := containers[contName]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
opts := &DeviceRunContainerOptions{}
|
||||
// Maps to detect duplicate settings.
|
||||
devsMap := make(map[string]string)
|
||||
mountsMap := make(map[string]string)
|
||||
envsMap := make(map[string]string)
|
||||
annotationsMap := make(map[string]string)
|
||||
// Keep track of all CDI devices requested for the container.
|
||||
allCDIDevices := sets.New[string]()
|
||||
// Loops through AllocationResponses of all cached device resources.
|
||||
for _, devices := range resources {
|
||||
resp := devices.allocResp
|
||||
// Each Allocate response has the following artifacts.
|
||||
// Environment variables
|
||||
// Mount points
|
||||
// Device files
|
||||
// Container annotations
|
||||
// CDI device IDs
|
||||
// These artifacts are per resource per container.
|
||||
// Updates RunContainerOptions.Envs.
|
||||
for k, v := range resp.Envs {
|
||||
if e, ok := envsMap[k]; ok {
|
||||
klog.V(4).InfoS("Skip existing env", "envKey", k, "envValue", v)
|
||||
if e != v {
|
||||
klog.ErrorS(nil, "Environment variable has conflicting setting", "envKey", k, "expected", v, "got", e)
|
||||
}
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("Add env", "envKey", k, "envValue", v)
|
||||
envsMap[k] = v
|
||||
opts.Envs = append(opts.Envs, kubecontainer.EnvVar{Name: k, Value: v})
|
||||
}
|
||||
|
||||
// Updates RunContainerOptions.Devices.
|
||||
for _, dev := range resp.Devices {
|
||||
if d, ok := devsMap[dev.ContainerPath]; ok {
|
||||
klog.V(4).InfoS("Skip existing device", "containerPath", dev.ContainerPath, "hostPath", dev.HostPath)
|
||||
if d != dev.HostPath {
|
||||
klog.ErrorS(nil, "Container device has conflicting mapping host devices",
|
||||
"containerPath", dev.ContainerPath, "got", d, "expected", dev.HostPath)
|
||||
}
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("Add device", "containerPath", dev.ContainerPath, "hostPath", dev.HostPath)
|
||||
devsMap[dev.ContainerPath] = dev.HostPath
|
||||
opts.Devices = append(opts.Devices, kubecontainer.DeviceInfo{
|
||||
PathOnHost: dev.HostPath,
|
||||
PathInContainer: dev.ContainerPath,
|
||||
Permissions: dev.Permissions,
|
||||
})
|
||||
}
|
||||
|
||||
// Updates RunContainerOptions.Mounts.
|
||||
for _, mount := range resp.Mounts {
|
||||
if m, ok := mountsMap[mount.ContainerPath]; ok {
|
||||
klog.V(4).InfoS("Skip existing mount", "containerPath", mount.ContainerPath, "hostPath", mount.HostPath)
|
||||
if m != mount.HostPath {
|
||||
klog.ErrorS(nil, "Container mount has conflicting mapping host mounts",
|
||||
"containerPath", mount.ContainerPath, "conflictingPath", m, "hostPath", mount.HostPath)
|
||||
}
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("Add mount", "containerPath", mount.ContainerPath, "hostPath", mount.HostPath)
|
||||
mountsMap[mount.ContainerPath] = mount.HostPath
|
||||
opts.Mounts = append(opts.Mounts, kubecontainer.Mount{
|
||||
Name: mount.ContainerPath,
|
||||
ContainerPath: mount.ContainerPath,
|
||||
HostPath: mount.HostPath,
|
||||
ReadOnly: mount.ReadOnly,
|
||||
// TODO: This may need to be part of Device plugin API.
|
||||
SELinuxRelabel: false,
|
||||
})
|
||||
}
|
||||
|
||||
// Updates for Annotations
|
||||
for k, v := range resp.Annotations {
|
||||
if e, ok := annotationsMap[k]; ok {
|
||||
klog.V(4).InfoS("Skip existing annotation", "annotationKey", k, "annotationValue", v)
|
||||
if e != v {
|
||||
klog.ErrorS(nil, "Annotation has conflicting setting", "annotationKey", k, "expected", e, "got", v)
|
||||
}
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("Add annotation", "annotationKey", k, "annotationValue", v)
|
||||
annotationsMap[k] = v
|
||||
opts.Annotations = append(opts.Annotations, kubecontainer.Annotation{Name: k, Value: v})
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DevicePluginCDIDevices) {
|
||||
// Updates for CDI devices.
|
||||
cdiDevices := getCDIDeviceInfo(resp, allCDIDevices)
|
||||
opts.CDIDevices = append(opts.CDIDevices, cdiDevices...)
|
||||
}
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
// getCDIDeviceInfo returns CDI devices from an allocate response
|
||||
func getCDIDeviceInfo(resp *pluginapi.ContainerAllocateResponse, knownCDIDevices sets.Set[string]) []kubecontainer.CDIDevice {
|
||||
var cdiDevices []kubecontainer.CDIDevice
|
||||
for _, cdiDevice := range resp.CDIDevices {
|
||||
if knownCDIDevices.Has(cdiDevice.Name) {
|
||||
klog.V(4).InfoS("Skip existing CDI Device", "name", cdiDevice.Name)
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("Add CDI device", "name", cdiDevice.Name)
|
||||
knownCDIDevices.Insert(cdiDevice.Name)
|
||||
|
||||
device := kubecontainer.CDIDevice{
|
||||
Name: cdiDevice.Name,
|
||||
}
|
||||
cdiDevices = append(cdiDevices, device)
|
||||
}
|
||||
|
||||
return cdiDevices
|
||||
}
|
||||
|
||||
// getContainerDevices returns the devices assigned to the provided container for all ResourceNames
|
||||
func (pdev *podDevices) getContainerDevices(podUID, contName string) ResourceDeviceInstances {
|
||||
pdev.RLock()
|
||||
defer pdev.RUnlock()
|
||||
|
||||
if _, podExists := pdev.devs[podUID]; !podExists {
|
||||
return nil
|
||||
}
|
||||
if _, contExists := pdev.devs[podUID][contName]; !contExists {
|
||||
return nil
|
||||
}
|
||||
resDev := NewResourceDeviceInstances()
|
||||
for resource, allocateInfo := range pdev.devs[podUID][contName] {
|
||||
if len(allocateInfo.deviceIds) == 0 {
|
||||
continue
|
||||
}
|
||||
devicePluginMap := make(map[string]pluginapi.Device)
|
||||
for numaid, devlist := range allocateInfo.deviceIds {
|
||||
for _, devID := range devlist {
|
||||
var topology *pluginapi.TopologyInfo
|
||||
if numaid != nodeWithoutTopology {
|
||||
NUMANodes := []*pluginapi.NUMANode{{ID: numaid}}
|
||||
if pDev, ok := devicePluginMap[devID]; ok && pDev.Topology != nil {
|
||||
if nodes := pDev.Topology.GetNodes(); nodes != nil {
|
||||
NUMANodes = append(NUMANodes, nodes...)
|
||||
}
|
||||
}
|
||||
|
||||
// ID and Healthy are not relevant here.
|
||||
topology = &pluginapi.TopologyInfo{Nodes: NUMANodes}
|
||||
}
|
||||
devicePluginMap[devID] = pluginapi.Device{
|
||||
Topology: topology,
|
||||
}
|
||||
}
|
||||
}
|
||||
resDev[resource] = devicePluginMap
|
||||
}
|
||||
return resDev
|
||||
}
|
||||
|
||||
// DeviceInstances is a mapping device name -> plugin device data
|
||||
type DeviceInstances map[string]pluginapi.Device
|
||||
|
||||
// ResourceDeviceInstances is a mapping resource name -> DeviceInstances
|
||||
type ResourceDeviceInstances map[string]DeviceInstances
|
||||
|
||||
// NewResourceDeviceInstances returns a new ResourceDeviceInstances
|
||||
func NewResourceDeviceInstances() ResourceDeviceInstances {
|
||||
return make(ResourceDeviceInstances)
|
||||
}
|
||||
|
||||
// Clone returns a clone of ResourceDeviceInstances
|
||||
func (rdev ResourceDeviceInstances) Clone() ResourceDeviceInstances {
|
||||
clone := NewResourceDeviceInstances()
|
||||
for resourceName, resourceDevs := range rdev {
|
||||
clone[resourceName] = make(map[string]pluginapi.Device)
|
||||
for devID, dev := range resourceDevs {
|
||||
clone[resourceName][devID] = dev
|
||||
}
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
// Filter takes a condition set expressed as map[string]sets.Set[string] and returns a new
|
||||
// ResourceDeviceInstances with only the devices matching the condition set.
|
||||
func (rdev ResourceDeviceInstances) Filter(cond map[string]sets.Set[string]) ResourceDeviceInstances {
|
||||
filtered := NewResourceDeviceInstances()
|
||||
for resourceName, filterIDs := range cond {
|
||||
if _, exists := rdev[resourceName]; !exists {
|
||||
continue
|
||||
}
|
||||
filtered[resourceName] = DeviceInstances{}
|
||||
for instanceID, instance := range rdev[resourceName] {
|
||||
if filterIDs.Has(instanceID) {
|
||||
filtered[resourceName][instanceID] = instance
|
||||
}
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
252
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/topology_hints.go
generated
vendored
Normal file
252
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/topology_hints.go
generated
vendored
Normal file
@ -0,0 +1,252 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/component-helpers/resource"
|
||||
"k8s.io/klog/v2"
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
)
|
||||
|
||||
// GetTopologyHints implements the TopologyManager HintProvider Interface which
|
||||
// ensures the Device Manager is consulted when Topology Aware Hints for each
|
||||
// container are created.
|
||||
func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
// Garbage collect any stranded device resources before providing TopologyHints
|
||||
m.UpdateAllocatedDevices()
|
||||
|
||||
// Loop through all device resources and generate TopologyHints for them.
|
||||
deviceHints := make(map[string][]topologymanager.TopologyHint)
|
||||
accumulatedResourceRequests := m.getContainerDeviceRequest(container)
|
||||
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
for resource, requested := range accumulatedResourceRequests {
|
||||
// Only consider devices that actually contain topology information.
|
||||
if aligned := m.deviceHasTopologyAlignment(resource); !aligned {
|
||||
klog.InfoS("Resource does not have a topology preference", "resource", resource)
|
||||
deviceHints[resource] = nil
|
||||
continue
|
||||
}
|
||||
|
||||
// Short circuit to regenerate the same hints if there are already
|
||||
// devices allocated to the Container. This might happen after a
|
||||
// kubelet restart, for example.
|
||||
allocated := m.podDevices.containerDevices(string(pod.UID), container.Name, resource)
|
||||
if allocated.Len() > 0 {
|
||||
if allocated.Len() != requested {
|
||||
klog.ErrorS(nil, "Resource already allocated to pod with different number than request", "resource", resource, "pod", klog.KObj(pod), "containerName", container.Name, "request", requested, "allocated", allocated.Len())
|
||||
deviceHints[resource] = []topologymanager.TopologyHint{}
|
||||
continue
|
||||
}
|
||||
klog.InfoS("Regenerating TopologyHints for resource already allocated to pod", "resource", resource, "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.Set[string]{}, requested)
|
||||
continue
|
||||
}
|
||||
|
||||
// Get the list of available devices, for which TopologyHints should be generated.
|
||||
available := m.getAvailableDevices(resource)
|
||||
reusable := m.devicesToReuse[string(pod.UID)][resource]
|
||||
if available.Union(reusable).Len() < requested {
|
||||
klog.ErrorS(nil, "Unable to generate topology hints: requested number of devices unavailable", "resource", resource, "request", requested, "available", available.Union(reusable).Len())
|
||||
deviceHints[resource] = []topologymanager.TopologyHint{}
|
||||
continue
|
||||
}
|
||||
|
||||
// Generate TopologyHints for this resource given the current
|
||||
// request size and the list of available devices.
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, reusable, requested)
|
||||
}
|
||||
|
||||
return deviceHints
|
||||
}
|
||||
|
||||
// GetPodTopologyHints implements the topologymanager.HintProvider Interface which
|
||||
// ensures the Device Manager is consulted when Topology Aware Hints for Pod are created.
|
||||
func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
// Garbage collect any stranded device resources before providing TopologyHints
|
||||
m.UpdateAllocatedDevices()
|
||||
|
||||
deviceHints := make(map[string][]topologymanager.TopologyHint)
|
||||
accumulatedResourceRequests := m.getPodDeviceRequest(pod)
|
||||
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
for resource, requested := range accumulatedResourceRequests {
|
||||
// Only consider devices that actually contain topology information.
|
||||
if aligned := m.deviceHasTopologyAlignment(resource); !aligned {
|
||||
klog.InfoS("Resource does not have a topology preference", "resource", resource)
|
||||
deviceHints[resource] = nil
|
||||
continue
|
||||
}
|
||||
|
||||
// Short circuit to regenerate the same hints if there are already
|
||||
// devices allocated to the Pod. This might happen after a
|
||||
// kubelet restart, for example.
|
||||
allocated := m.podDevices.podDevices(string(pod.UID), resource)
|
||||
if allocated.Len() > 0 {
|
||||
if allocated.Len() != requested {
|
||||
klog.ErrorS(nil, "Resource already allocated to pod with different number than request", "resource", resource, "pod", klog.KObj(pod), "request", requested, "allocated", allocated.Len())
|
||||
deviceHints[resource] = []topologymanager.TopologyHint{}
|
||||
continue
|
||||
}
|
||||
klog.InfoS("Regenerating TopologyHints for resource already allocated to pod", "resource", resource, "pod", klog.KObj(pod))
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.Set[string]{}, requested)
|
||||
continue
|
||||
}
|
||||
|
||||
// Get the list of available devices, for which TopologyHints should be generated.
|
||||
available := m.getAvailableDevices(resource)
|
||||
if available.Len() < requested {
|
||||
klog.ErrorS(nil, "Unable to generate topology hints: requested number of devices unavailable", "resource", resource, "request", requested, "available", available.Len())
|
||||
deviceHints[resource] = []topologymanager.TopologyHint{}
|
||||
continue
|
||||
}
|
||||
|
||||
// Generate TopologyHints for this resource given the current
|
||||
// request size and the list of available devices.
|
||||
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, sets.Set[string]{}, requested)
|
||||
}
|
||||
|
||||
return deviceHints
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) deviceHasTopologyAlignment(resource string) bool {
|
||||
// If any device has Topology NUMANodes available, we assume they care about alignment.
|
||||
for _, device := range m.allDevices[resource] {
|
||||
if device.Topology != nil && len(device.Topology.Nodes) > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) getAvailableDevices(resource string) sets.Set[string] {
|
||||
// Strip all devices in use from the list of healthy ones.
|
||||
return m.healthyDevices[resource].Difference(m.allocatedDevices[resource])
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, available sets.Set[string], reusable sets.Set[string], request int) []topologymanager.TopologyHint {
|
||||
// Initialize minAffinitySize to include all NUMA Nodes
|
||||
minAffinitySize := len(m.numaNodes)
|
||||
|
||||
// Iterate through all combinations of NUMA Nodes and build hints from them.
|
||||
hints := []topologymanager.TopologyHint{}
|
||||
bitmask.IterateBitMasks(m.numaNodes, func(mask bitmask.BitMask) {
|
||||
// First, update minAffinitySize for the current request size.
|
||||
devicesInMask := 0
|
||||
for _, device := range m.allDevices[resource] {
|
||||
if mask.AnySet(m.getNUMANodeIds(device.Topology)) {
|
||||
devicesInMask++
|
||||
}
|
||||
}
|
||||
if devicesInMask >= request && mask.Count() < minAffinitySize {
|
||||
minAffinitySize = mask.Count()
|
||||
}
|
||||
|
||||
// Then check to see if all the reusable devices are part of the bitmask.
|
||||
numMatching := 0
|
||||
for d := range reusable {
|
||||
// Skip the device if it doesn't specify any topology info.
|
||||
if m.allDevices[resource][d].Topology == nil {
|
||||
continue
|
||||
}
|
||||
// Otherwise disregard this mask if its NUMANode isn't part of it.
|
||||
if !mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
|
||||
return
|
||||
}
|
||||
numMatching++
|
||||
}
|
||||
|
||||
// Finally, check to see if enough available devices remain on the
|
||||
// current NUMA node combination to satisfy the device request.
|
||||
for d := range available {
|
||||
if mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
|
||||
numMatching++
|
||||
}
|
||||
}
|
||||
|
||||
// If they don't, then move onto the next combination.
|
||||
if numMatching < request {
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, create a new hint from the NUMA mask and add it to the
|
||||
// list of hints. We set all hint preferences to 'false' on the first
|
||||
// pass through.
|
||||
hints = append(hints, topologymanager.TopologyHint{
|
||||
NUMANodeAffinity: mask,
|
||||
Preferred: false,
|
||||
})
|
||||
})
|
||||
|
||||
// Loop back through all hints and update the 'Preferred' field based on
|
||||
// counting the number of bits sets in the affinity mask and comparing it
|
||||
// to the minAffinity. Only those with an equal number of bits set will be
|
||||
// considered preferred.
|
||||
for i := range hints {
|
||||
if hints[i].NUMANodeAffinity.Count() == minAffinitySize {
|
||||
hints[i].Preferred = true
|
||||
}
|
||||
}
|
||||
|
||||
return hints
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) getNUMANodeIds(topology *pluginapi.TopologyInfo) []int {
|
||||
if topology == nil {
|
||||
return nil
|
||||
}
|
||||
var ids []int
|
||||
for _, n := range topology.Nodes {
|
||||
ids = append(ids, int(n.ID))
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) getPodDeviceRequest(pod *v1.Pod) map[string]int {
|
||||
// for these device plugin resources, requests == limits
|
||||
limits := resource.PodLimits(pod, resource.PodResourcesOptions{
|
||||
ExcludeOverhead: true,
|
||||
})
|
||||
podRequests := make(map[string]int)
|
||||
for resourceName, quantity := range limits {
|
||||
if !m.isDevicePluginResource(string(resourceName)) {
|
||||
continue
|
||||
}
|
||||
podRequests[string(resourceName)] = int(quantity.Value())
|
||||
}
|
||||
return podRequests
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) getContainerDeviceRequest(container *v1.Container) map[string]int {
|
||||
containerRequests := make(map[string]int)
|
||||
for resourceObj, requestedObj := range container.Resources.Limits {
|
||||
resource := string(resourceObj)
|
||||
requested := int(requestedObj.Value())
|
||||
if !m.isDevicePluginResource(resource) {
|
||||
continue
|
||||
}
|
||||
containerRequests[resource] = requested
|
||||
}
|
||||
return containerRequests
|
||||
}
|
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/types.go
generated
vendored
Normal file
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/types.go
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/resourceupdates"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// Manager manages all the Device Plugins running on a node.
|
||||
type Manager interface {
|
||||
// Start starts device plugin registration service.
|
||||
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, initialContainers containermap.ContainerMap, initialContainerRunningSet sets.Set[string]) error
|
||||
|
||||
// Allocate configures and assigns devices to a container in a pod. From
|
||||
// the requested device resources, Allocate will communicate with the
|
||||
// owning device plugin to allow setup procedures to take place, and for
|
||||
// the device plugin to provide runtime settings to use the device
|
||||
// (environment variables, mount points and device files).
|
||||
Allocate(pod *v1.Pod, container *v1.Container) error
|
||||
|
||||
// UpdatePluginResources updates node resources based on devices already
|
||||
// allocated to pods. The node object is provided for the device manager to
|
||||
// update the node capacity to reflect the currently available devices.
|
||||
UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error
|
||||
|
||||
// Stop stops the manager.
|
||||
Stop() error
|
||||
|
||||
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
|
||||
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
|
||||
// for the found one. An empty struct is returned in case no cached state is found.
|
||||
GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error)
|
||||
|
||||
// GetCapacity returns the amount of available device plugin resource capacity, resource allocatable
|
||||
// and inactive device plugin resources previously registered on the node.
|
||||
GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
|
||||
|
||||
// GetWatcherHandler returns the plugin handler for the device manager.
|
||||
GetWatcherHandler() cache.PluginHandler
|
||||
GetHealthChecker() healthz.HealthChecker
|
||||
|
||||
// GetDevices returns information about the devices assigned to pods and containers
|
||||
GetDevices(podUID, containerName string) ResourceDeviceInstances
|
||||
|
||||
// UpdateAllocatedResourcesStatus updates the status of allocated resources for the pod.
|
||||
UpdateAllocatedResourcesStatus(pod *v1.Pod, status *v1.PodStatus)
|
||||
|
||||
// GetAllocatableDevices returns information about all the devices known to the manager
|
||||
GetAllocatableDevices() ResourceDeviceInstances
|
||||
|
||||
// ShouldResetExtendedResourceCapacity returns whether the extended resources should be reset or not,
|
||||
// depending on the checkpoint file availability. Absence of the checkpoint file strongly indicates
|
||||
// the node has been recreated.
|
||||
ShouldResetExtendedResourceCapacity() bool
|
||||
|
||||
// TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface
|
||||
// and is consulted to make Topology aware resource alignments
|
||||
GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint
|
||||
|
||||
// TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface
|
||||
// and is consulted to make Topology aware resource alignments per Pod
|
||||
GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint
|
||||
|
||||
// UpdateAllocatedDevices frees any Devices that are bound to terminated pods.
|
||||
UpdateAllocatedDevices()
|
||||
|
||||
// Updates returns a channel that receives an Update when the device changed its status.
|
||||
Updates() <-chan resourceupdates.Update
|
||||
}
|
||||
|
||||
// DeviceRunContainerOptions contains the combined container runtime settings to consume its allocated devices.
|
||||
type DeviceRunContainerOptions struct {
|
||||
// The environment variables list.
|
||||
Envs []kubecontainer.EnvVar
|
||||
// The mounts for the container.
|
||||
Mounts []kubecontainer.Mount
|
||||
// The host devices mapped into the container.
|
||||
Devices []kubecontainer.DeviceInfo
|
||||
// The Annotations for the container
|
||||
Annotations []kubecontainer.Annotation
|
||||
// CDI Devices for the container
|
||||
CDIDevices []kubecontainer.CDIDevice
|
||||
}
|
||||
|
||||
// TODO: evaluate whether we need this error definition.
|
||||
const (
|
||||
errEndpointStopped = "endpoint %v has been stopped"
|
||||
)
|
||||
|
||||
// endpointStopGracePeriod indicates the grace period after an endpoint is stopped
|
||||
// because its device plugin fails. DeviceManager keeps the stopped endpoint in its
|
||||
// cache during this grace period to cover the time gap for the capacity change to
|
||||
// take effect.
|
||||
const endpointStopGracePeriod = time.Duration(5) * time.Minute
|
||||
|
||||
// kubeletDeviceManagerCheckpoint is the file name of device plugin checkpoint
|
||||
const kubeletDeviceManagerCheckpoint = "kubelet_internal_checkpoint"
|
21
vendor/k8s.io/kubernetes/pkg/kubelet/cm/doc.go
generated
vendored
Normal file
21
vendor/k8s.io/kubernetes/pkg/kubelet/cm/doc.go
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package cm (abbreviation of "container manager") and its subpackages contain all the kubelet code
|
||||
// to manage containers. For example, they contain functions to configure containers' cgroups,
|
||||
// ensure containers run with the desired QoS, and allocate compute resources like cpus, memory,
|
||||
// devices...
|
||||
package cm // import "k8s.io/kubernetes/pkg/kubelet/cm"
|
2
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/OWNERS
generated
vendored
Normal file
2
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/OWNERS
generated
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
labels:
|
||||
- wg/device-management
|
222
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/claiminfo.go
generated
vendored
Normal file
222
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/claiminfo.go
generated
vendored
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dra
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
// ClaimInfo holds information required
|
||||
// to prepare and unprepare a resource claim.
|
||||
// +k8s:deepcopy-gen=true
|
||||
type ClaimInfo struct {
|
||||
state.ClaimInfoState
|
||||
prepared bool
|
||||
}
|
||||
|
||||
// claimInfoCache is a cache of processed resource claims keyed by namespace/claimname.
|
||||
type claimInfoCache struct {
|
||||
sync.RWMutex
|
||||
checkpointer state.Checkpointer
|
||||
claimInfo map[string]*ClaimInfo
|
||||
}
|
||||
|
||||
// newClaimInfoFromClaim creates a new claim info from a resource claim.
|
||||
// It verifies that the kubelet can handle the claim.
|
||||
func newClaimInfoFromClaim(claim *resourceapi.ResourceClaim) (*ClaimInfo, error) {
|
||||
claimInfoState := state.ClaimInfoState{
|
||||
ClaimUID: claim.UID,
|
||||
ClaimName: claim.Name,
|
||||
Namespace: claim.Namespace,
|
||||
PodUIDs: sets.New[string](),
|
||||
DriverState: make(map[string]state.DriverState),
|
||||
}
|
||||
if claim.Status.Allocation == nil {
|
||||
return nil, errors.New("not allocated")
|
||||
}
|
||||
for _, result := range claim.Status.Allocation.Devices.Results {
|
||||
claimInfoState.DriverState[result.Driver] = state.DriverState{}
|
||||
}
|
||||
info := &ClaimInfo{
|
||||
ClaimInfoState: claimInfoState,
|
||||
prepared: false,
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// newClaimInfoFromClaim creates a new claim info from a checkpointed claim info state object.
|
||||
func newClaimInfoFromState(state *state.ClaimInfoState) *ClaimInfo {
|
||||
info := &ClaimInfo{
|
||||
ClaimInfoState: *state.DeepCopy(),
|
||||
prepared: false,
|
||||
}
|
||||
return info
|
||||
}
|
||||
|
||||
// setCDIDevices adds a set of CDI devices to the claim info.
|
||||
func (info *ClaimInfo) addDevice(driverName string, deviceState state.Device) {
|
||||
if info.DriverState == nil {
|
||||
info.DriverState = make(map[string]state.DriverState)
|
||||
}
|
||||
driverState := info.DriverState[driverName]
|
||||
driverState.Devices = append(driverState.Devices, deviceState)
|
||||
info.DriverState[driverName] = driverState
|
||||
}
|
||||
|
||||
// addPodReference adds a pod reference to the claim info.
|
||||
func (info *ClaimInfo) addPodReference(podUID types.UID) {
|
||||
info.PodUIDs.Insert(string(podUID))
|
||||
}
|
||||
|
||||
// hasPodReference checks if a pod reference exists in the claim info.
|
||||
func (info *ClaimInfo) hasPodReference(podUID types.UID) bool {
|
||||
return info.PodUIDs.Has(string(podUID))
|
||||
}
|
||||
|
||||
// deletePodReference deletes a pod reference from the claim info.
|
||||
func (info *ClaimInfo) deletePodReference(podUID types.UID) {
|
||||
info.PodUIDs.Delete(string(podUID))
|
||||
}
|
||||
|
||||
// setPrepared marks the claim info as prepared.
|
||||
func (info *ClaimInfo) setPrepared() {
|
||||
info.prepared = true
|
||||
}
|
||||
|
||||
// isPrepared checks if claim info is prepared or not.
|
||||
func (info *ClaimInfo) isPrepared() bool {
|
||||
return info.prepared
|
||||
}
|
||||
|
||||
// newClaimInfoCache creates a new claim info cache object, pre-populated from a checkpoint (if present).
|
||||
func newClaimInfoCache(stateDir, checkpointName string) (*claimInfoCache, error) {
|
||||
checkpointer, err := state.NewCheckpointer(stateDir, checkpointName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not initialize checkpoint manager, please drain node and remove dra state file, err: %w", err)
|
||||
}
|
||||
|
||||
checkpoint, err := checkpointer.GetOrCreate()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error calling GetOrCreate() on checkpoint state: %w", err)
|
||||
}
|
||||
|
||||
cache := &claimInfoCache{
|
||||
checkpointer: checkpointer,
|
||||
claimInfo: make(map[string]*ClaimInfo),
|
||||
}
|
||||
|
||||
entries, err := checkpoint.GetClaimInfoStateList()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error calling GetEntries() on checkpoint: %w", err)
|
||||
|
||||
}
|
||||
for _, entry := range entries {
|
||||
info := newClaimInfoFromState(&entry)
|
||||
cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
|
||||
}
|
||||
|
||||
return cache, nil
|
||||
}
|
||||
|
||||
// withLock runs a function while holding the claimInfoCache lock.
|
||||
func (cache *claimInfoCache) withLock(f func() error) error {
|
||||
cache.Lock()
|
||||
defer cache.Unlock()
|
||||
return f()
|
||||
}
|
||||
|
||||
// withRLock runs a function while holding the claimInfoCache rlock.
|
||||
func (cache *claimInfoCache) withRLock(f func() error) error {
|
||||
cache.RLock()
|
||||
defer cache.RUnlock()
|
||||
return f()
|
||||
}
|
||||
|
||||
// add adds a new claim info object into the claim info cache.
|
||||
func (cache *claimInfoCache) add(info *ClaimInfo) *ClaimInfo {
|
||||
cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
|
||||
return info
|
||||
}
|
||||
|
||||
// contains checks to see if a specific claim info object is already in the cache.
|
||||
func (cache *claimInfoCache) contains(claimName, namespace string) bool {
|
||||
_, exists := cache.claimInfo[namespace+"/"+claimName]
|
||||
return exists
|
||||
}
|
||||
|
||||
// get gets a specific claim info object from the cache.
|
||||
func (cache *claimInfoCache) get(claimName, namespace string) (*ClaimInfo, bool) {
|
||||
info, exists := cache.claimInfo[namespace+"/"+claimName]
|
||||
return info, exists
|
||||
}
|
||||
|
||||
// delete deletes a specific claim info object from the cache.
|
||||
func (cache *claimInfoCache) delete(claimName, namespace string) {
|
||||
delete(cache.claimInfo, namespace+"/"+claimName)
|
||||
}
|
||||
|
||||
// hasPodReference checks if there is at least one claim
|
||||
// that is referenced by the pod with the given UID
|
||||
// This function is used indirectly by the status manager
|
||||
// to check if pod can enter termination status
|
||||
func (cache *claimInfoCache) hasPodReference(uid types.UID) bool {
|
||||
for _, claimInfo := range cache.claimInfo {
|
||||
if claimInfo.hasPodReference(uid) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// syncToCheckpoint syncs the full claim info cache state to a checkpoint.
|
||||
func (cache *claimInfoCache) syncToCheckpoint() error {
|
||||
claimInfoStateList := make(state.ClaimInfoStateList, 0, len(cache.claimInfo))
|
||||
for _, infoClaim := range cache.claimInfo {
|
||||
claimInfoStateList = append(claimInfoStateList, infoClaim.ClaimInfoState)
|
||||
}
|
||||
checkpoint, err := state.NewCheckpoint(claimInfoStateList)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return cache.checkpointer.Store(checkpoint)
|
||||
}
|
||||
|
||||
// cdiDevicesAsList returns a list of CDIDevices from the provided claim info.
|
||||
// When the request name is non-empty, only devices relevant for that request
|
||||
// are returned.
|
||||
func (info *ClaimInfo) cdiDevicesAsList(requestName string) []kubecontainer.CDIDevice {
|
||||
var cdiDevices []kubecontainer.CDIDevice
|
||||
for _, driverData := range info.DriverState {
|
||||
for _, device := range driverData.Devices {
|
||||
if requestName == "" || len(device.RequestNames) == 0 || slices.Contains(device.RequestNames, requestName) {
|
||||
for _, cdiDeviceID := range device.CDIDeviceIDs {
|
||||
cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: cdiDeviceID})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return cdiDevices
|
||||
}
|
553
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/manager.go
generated
vendored
Normal file
553
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/manager.go
generated
vendored
Normal file
@ -0,0 +1,553 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dra
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/dynamic-resource-allocation/resourceclaim"
|
||||
"k8s.io/klog/v2"
|
||||
drapb "k8s.io/kubelet/pkg/apis/dra/v1beta1"
|
||||
dra "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
// draManagerStateFileName is the file name where dra manager stores its state
|
||||
const draManagerStateFileName = "dra_manager_state"
|
||||
|
||||
// defaultReconcilePeriod is the default reconciliation period to keep all claim info state in sync.
|
||||
const defaultReconcilePeriod = 60 * time.Second
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
type ActivePodsFunc func() []*v1.Pod
|
||||
|
||||
// GetNodeFunc is a function that returns the node object using the kubelet's node lister.
|
||||
type GetNodeFunc func() (*v1.Node, error)
|
||||
|
||||
// ManagerImpl is the structure in charge of managing DRA drivers.
|
||||
type ManagerImpl struct {
|
||||
// cache contains cached claim info
|
||||
cache *claimInfoCache
|
||||
|
||||
// reconcilePeriod is the duration between calls to reconcileLoop.
|
||||
reconcilePeriod time.Duration
|
||||
|
||||
// activePods is a method for listing active pods on the node
|
||||
// so all claim info state can be updated in the reconciliation loop.
|
||||
activePods ActivePodsFunc
|
||||
|
||||
// sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness.
|
||||
// We use it to determine when we can treat pods as inactive and react appropriately.
|
||||
sourcesReady config.SourcesReady
|
||||
|
||||
// KubeClient reference
|
||||
kubeClient clientset.Interface
|
||||
|
||||
// getNode is a function that returns the node object using the kubelet's node lister.
|
||||
getNode GetNodeFunc
|
||||
}
|
||||
|
||||
// NewManagerImpl creates a new manager.
|
||||
func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, nodeName types.NodeName) (*ManagerImpl, error) {
|
||||
claimInfoCache, err := newClaimInfoCache(stateFileDirectory, draManagerStateFileName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create claimInfo cache: %w", err)
|
||||
}
|
||||
|
||||
// TODO: for now the reconcile period is not configurable.
|
||||
// We should consider making it configurable in the future.
|
||||
reconcilePeriod := defaultReconcilePeriod
|
||||
|
||||
manager := &ManagerImpl{
|
||||
cache: claimInfoCache,
|
||||
kubeClient: kubeClient,
|
||||
reconcilePeriod: reconcilePeriod,
|
||||
activePods: nil,
|
||||
sourcesReady: nil,
|
||||
}
|
||||
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) GetWatcherHandler() cache.PluginHandler {
|
||||
return cache.PluginHandler(dra.NewRegistrationHandler(m.kubeClient, m.getNode))
|
||||
}
|
||||
|
||||
// Start starts the reconcile loop of the manager.
|
||||
func (m *ManagerImpl) Start(ctx context.Context, activePods ActivePodsFunc, getNode GetNodeFunc, sourcesReady config.SourcesReady) error {
|
||||
m.activePods = activePods
|
||||
m.getNode = getNode
|
||||
m.sourcesReady = sourcesReady
|
||||
go wait.UntilWithContext(ctx, func(ctx context.Context) { m.reconcileLoop(ctx) }, m.reconcilePeriod)
|
||||
return nil
|
||||
}
|
||||
|
||||
// reconcileLoop ensures that any stale state in the manager's claimInfoCache gets periodically reconciled.
|
||||
func (m *ManagerImpl) reconcileLoop(ctx context.Context) {
|
||||
logger := klog.FromContext(ctx)
|
||||
// Only once all sources are ready do we attempt to reconcile.
|
||||
// This ensures that the call to m.activePods() below will succeed with
|
||||
// the actual active pods list.
|
||||
if m.sourcesReady == nil || !m.sourcesReady.AllReady() {
|
||||
return
|
||||
}
|
||||
|
||||
// Get the full list of active pods.
|
||||
activePods := sets.New[string]()
|
||||
for _, p := range m.activePods() {
|
||||
activePods.Insert(string(p.UID))
|
||||
}
|
||||
|
||||
// Get the list of inactive pods still referenced by any claimInfos.
|
||||
type podClaims struct {
|
||||
uid types.UID
|
||||
namespace string
|
||||
claimNames []string
|
||||
}
|
||||
inactivePodClaims := make(map[string]*podClaims)
|
||||
m.cache.RLock()
|
||||
for _, claimInfo := range m.cache.claimInfo {
|
||||
for podUID := range claimInfo.PodUIDs {
|
||||
if activePods.Has(podUID) {
|
||||
continue
|
||||
}
|
||||
if inactivePodClaims[podUID] == nil {
|
||||
inactivePodClaims[podUID] = &podClaims{
|
||||
uid: types.UID(podUID),
|
||||
namespace: claimInfo.Namespace,
|
||||
claimNames: []string{},
|
||||
}
|
||||
}
|
||||
inactivePodClaims[podUID].claimNames = append(inactivePodClaims[podUID].claimNames, claimInfo.ClaimName)
|
||||
}
|
||||
}
|
||||
m.cache.RUnlock()
|
||||
|
||||
// Loop through all inactive pods and call UnprepareResources on them.
|
||||
for _, podClaims := range inactivePodClaims {
|
||||
if err := m.unprepareResources(ctx, podClaims.uid, podClaims.namespace, podClaims.claimNames); err != nil {
|
||||
logger.Info("Unpreparing pod resources in reconcile loop failed, will retry", "podUID", podClaims.uid, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PrepareResources attempts to prepare all of the required resources
|
||||
// for the input container, issue NodePrepareResources rpc requests
|
||||
// for each new resource requirement, process their responses and update the cached
|
||||
// containerResources on success.
|
||||
func (m *ManagerImpl) PrepareResources(ctx context.Context, pod *v1.Pod) error {
|
||||
startTime := time.Now()
|
||||
err := m.prepareResources(ctx, pod)
|
||||
metrics.DRAOperationsDuration.WithLabelValues("PrepareResources", strconv.FormatBool(err == nil)).Observe(time.Since(startTime).Seconds())
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) prepareResources(ctx context.Context, pod *v1.Pod) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
batches := make(map[string][]*drapb.Claim)
|
||||
resourceClaims := make(map[types.UID]*resourceapi.ResourceClaim)
|
||||
for i := range pod.Spec.ResourceClaims {
|
||||
podClaim := &pod.Spec.ResourceClaims[i]
|
||||
logger.V(3).Info("Processing resource", "pod", klog.KObj(pod), "podClaim", podClaim.Name)
|
||||
claimName, mustCheckOwner, err := resourceclaim.Name(pod, podClaim)
|
||||
if err != nil {
|
||||
return fmt.Errorf("prepare resource claim: %w", err)
|
||||
}
|
||||
|
||||
if claimName == nil {
|
||||
// Nothing to do.
|
||||
logger.V(5).Info("No need to prepare resources, no claim generated", "pod", klog.KObj(pod), "podClaim", podClaim.Name)
|
||||
continue
|
||||
}
|
||||
// Query claim object from the API server
|
||||
resourceClaim, err := m.kubeClient.ResourceV1beta1().ResourceClaims(pod.Namespace).Get(
|
||||
ctx,
|
||||
*claimName,
|
||||
metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch ResourceClaim %s referenced by pod %s: %w", *claimName, pod.Name, err)
|
||||
}
|
||||
|
||||
if mustCheckOwner {
|
||||
if err = resourceclaim.IsForPod(pod, resourceClaim); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Check if pod is in the ReservedFor for the claim
|
||||
if !resourceclaim.IsReservedForPod(pod, resourceClaim) {
|
||||
return fmt.Errorf("pod %s(%s) is not allowed to use resource claim %s(%s)",
|
||||
pod.Name, pod.UID, *claimName, resourceClaim.UID)
|
||||
}
|
||||
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err = m.cache.withLock(func() error {
|
||||
// Get a reference to the claim info for this claim from the cache.
|
||||
// If there isn't one yet, then add it to the cache.
|
||||
claimInfo, exists := m.cache.get(resourceClaim.Name, resourceClaim.Namespace)
|
||||
if !exists {
|
||||
ci, err := newClaimInfoFromClaim(resourceClaim)
|
||||
if err != nil {
|
||||
return fmt.Errorf("claim %s: %w", klog.KObj(resourceClaim), err)
|
||||
}
|
||||
claimInfo = m.cache.add(ci)
|
||||
logger.V(6).Info("Created new claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo)
|
||||
} else {
|
||||
logger.V(6).Info("Found existing claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo)
|
||||
}
|
||||
|
||||
// Add a reference to the current pod in the claim info.
|
||||
claimInfo.addPodReference(pod.UID)
|
||||
|
||||
// Checkpoint to ensure all claims we plan to prepare are tracked.
|
||||
// If something goes wrong and the newly referenced pod gets
|
||||
// deleted without a successful prepare call, we will catch
|
||||
// that in the reconcile loop and take the appropriate action.
|
||||
if err := m.cache.syncToCheckpoint(); err != nil {
|
||||
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
|
||||
}
|
||||
|
||||
// If this claim is already prepared, there is no need to prepare it again.
|
||||
if claimInfo.isPrepared() {
|
||||
logger.V(5).Info("Resources already prepared", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim))
|
||||
return nil
|
||||
}
|
||||
|
||||
// This saved claim will be used to update ClaimInfo cache
|
||||
// after NodePrepareResources GRPC succeeds
|
||||
resourceClaims[claimInfo.ClaimUID] = resourceClaim
|
||||
|
||||
// Loop through all drivers and prepare for calling NodePrepareResources.
|
||||
claim := &drapb.Claim{
|
||||
Namespace: claimInfo.Namespace,
|
||||
UID: string(claimInfo.ClaimUID),
|
||||
Name: claimInfo.ClaimName,
|
||||
}
|
||||
for driverName := range claimInfo.DriverState {
|
||||
batches[driverName] = append(batches[driverName], claim)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Call NodePrepareResources for all claims in each batch.
|
||||
// If there is any error, processing gets aborted.
|
||||
// We could try to continue, but that would make the code more complex.
|
||||
for driverName, claims := range batches {
|
||||
// Call NodePrepareResources RPC for all resource handles.
|
||||
client, err := dra.NewDRAPluginClient(driverName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get gRPC client for driver %s: %w", driverName, err)
|
||||
}
|
||||
response, err := client.NodePrepareResources(ctx, &drapb.NodePrepareResourcesRequest{Claims: claims})
|
||||
if err != nil {
|
||||
// General error unrelated to any particular claim.
|
||||
return fmt.Errorf("NodePrepareResources failed: %w", err)
|
||||
}
|
||||
for claimUID, result := range response.Claims {
|
||||
reqClaim := lookupClaimRequest(claims, claimUID)
|
||||
if reqClaim == nil {
|
||||
return fmt.Errorf("NodePrepareResources returned result for unknown claim UID %s", claimUID)
|
||||
}
|
||||
if result.GetError() != "" {
|
||||
return fmt.Errorf("NodePrepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
|
||||
}
|
||||
|
||||
claim := resourceClaims[types.UID(claimUID)]
|
||||
|
||||
// Add the prepared CDI devices to the claim info
|
||||
err := m.cache.withLock(func() error {
|
||||
info, exists := m.cache.get(claim.Name, claim.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
|
||||
}
|
||||
for _, device := range result.GetDevices() {
|
||||
info.addDevice(driverName, state.Device{PoolName: device.PoolName, DeviceName: device.DeviceName, RequestNames: device.RequestNames, CDIDeviceIDs: device.CDIDeviceIDs})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
unfinished := len(claims) - len(response.Claims)
|
||||
if unfinished != 0 {
|
||||
return fmt.Errorf("NodePrepareResources left out %d claims", unfinished)
|
||||
}
|
||||
}
|
||||
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err := m.cache.withLock(func() error {
|
||||
// Mark all pod claims as prepared.
|
||||
for _, claim := range resourceClaims {
|
||||
info, exists := m.cache.get(claim.Name, claim.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
|
||||
}
|
||||
info.setPrepared()
|
||||
}
|
||||
|
||||
// Checkpoint to ensure all prepared claims are tracked with their list
|
||||
// of CDI devices attached.
|
||||
if err := m.cache.syncToCheckpoint(); err != nil {
|
||||
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func lookupClaimRequest(claims []*drapb.Claim, claimUID string) *drapb.Claim {
|
||||
for _, claim := range claims {
|
||||
if claim.UID == claimUID {
|
||||
return claim
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetResources gets a ContainerInfo object from the claimInfo cache.
|
||||
// This information is used by the caller to update a container config.
|
||||
func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error) {
|
||||
cdiDevices := []kubecontainer.CDIDevice{}
|
||||
|
||||
for i := range pod.Spec.ResourceClaims {
|
||||
podClaim := &pod.Spec.ResourceClaims[i]
|
||||
claimName, _, err := resourceclaim.Name(pod, podClaim)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list resource claims: %w", err)
|
||||
}
|
||||
// The claim name might be nil if no underlying resource claim
|
||||
// was generated for the referenced claim. There are valid use
|
||||
// cases when this might happen, so we simply skip it.
|
||||
if claimName == nil {
|
||||
continue
|
||||
}
|
||||
for _, claim := range container.Resources.Claims {
|
||||
if podClaim.Name != claim.Name {
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.cache.withRLock(func() error {
|
||||
claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
|
||||
}
|
||||
|
||||
// As of Kubernetes 1.31, CDI device IDs are not passed via annotations anymore.
|
||||
cdiDevices = append(cdiDevices, claimInfo.cdiDevicesAsList(claim.Request)...)
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return &ContainerInfo{CDIDevices: cdiDevices}, nil
|
||||
}
|
||||
|
||||
// UnprepareResources calls a driver's NodeUnprepareResource API for each resource claim owned by a pod.
|
||||
// This function is idempotent and may be called multiple times against the same pod.
|
||||
// As such, calls to the underlying NodeUnprepareResource API are skipped for claims that have
|
||||
// already been successfully unprepared.
|
||||
func (m *ManagerImpl) UnprepareResources(ctx context.Context, pod *v1.Pod) error {
|
||||
var err error = nil
|
||||
defer func(startTime time.Time) {
|
||||
metrics.DRAOperationsDuration.WithLabelValues("UnprepareResources", strconv.FormatBool(err != nil)).Observe(time.Since(startTime).Seconds())
|
||||
}(time.Now())
|
||||
var claimNames []string
|
||||
for i := range pod.Spec.ResourceClaims {
|
||||
claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
|
||||
if err != nil {
|
||||
return fmt.Errorf("unprepare resource claim: %w", err)
|
||||
}
|
||||
// The claim name might be nil if no underlying resource claim
|
||||
// was generated for the referenced claim. There are valid use
|
||||
// cases when this might happen, so we simply skip it.
|
||||
if claimName == nil {
|
||||
continue
|
||||
}
|
||||
claimNames = append(claimNames, *claimName)
|
||||
}
|
||||
err = m.unprepareResources(ctx, pod.UID, pod.Namespace, claimNames)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) unprepareResources(ctx context.Context, podUID types.UID, namespace string, claimNames []string) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
batches := make(map[string][]*drapb.Claim)
|
||||
claimNamesMap := make(map[types.UID]string)
|
||||
for _, claimName := range claimNames {
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err := m.cache.withLock(func() error {
|
||||
// Get the claim info from the cache
|
||||
claimInfo, exists := m.cache.get(claimName, namespace)
|
||||
|
||||
// Skip calling NodeUnprepareResource if claim info is not cached
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip calling NodeUnprepareResource if other pods are still referencing it
|
||||
if len(claimInfo.PodUIDs) > 1 {
|
||||
// We delay checkpointing of this change until
|
||||
// UnprepareResources returns successfully. It is OK to do
|
||||
// this because we will only return successfully from this call
|
||||
// if the checkpoint has succeeded. That means if the kubelet
|
||||
// is ever restarted before this checkpoint succeeds, we will
|
||||
// simply call into this (idempotent) function again.
|
||||
claimInfo.deletePodReference(podUID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// This claimInfo name will be used to update ClaimInfo cache
|
||||
// after NodeUnprepareResources GRPC succeeds
|
||||
claimNamesMap[claimInfo.ClaimUID] = claimInfo.ClaimName
|
||||
|
||||
// Loop through all drivers and prepare for calling NodeUnprepareResources.
|
||||
claim := &drapb.Claim{
|
||||
Namespace: claimInfo.Namespace,
|
||||
UID: string(claimInfo.ClaimUID),
|
||||
Name: claimInfo.ClaimName,
|
||||
}
|
||||
for driverName := range claimInfo.DriverState {
|
||||
batches[driverName] = append(batches[driverName], claim)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Call NodeUnprepareResources for all claims in each batch.
|
||||
// If there is any error, processing gets aborted.
|
||||
// We could try to continue, but that would make the code more complex.
|
||||
for driverName, claims := range batches {
|
||||
// Call NodeUnprepareResources RPC for all resource handles.
|
||||
client, err := dra.NewDRAPluginClient(driverName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get gRPC client for DRA driver %s: %w", driverName, err)
|
||||
}
|
||||
response, err := client.NodeUnprepareResources(ctx, &drapb.NodeUnprepareResourcesRequest{Claims: claims})
|
||||
if err != nil {
|
||||
// General error unrelated to any particular claim.
|
||||
return fmt.Errorf("NodeUnprepareResources failed: %w", err)
|
||||
}
|
||||
|
||||
for claimUID, result := range response.Claims {
|
||||
reqClaim := lookupClaimRequest(claims, claimUID)
|
||||
if reqClaim == nil {
|
||||
return fmt.Errorf("NodeUnprepareResources returned result for unknown claim UID %s", claimUID)
|
||||
}
|
||||
if result.GetError() != "" {
|
||||
return fmt.Errorf("NodeUnprepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
|
||||
}
|
||||
}
|
||||
|
||||
unfinished := len(claims) - len(response.Claims)
|
||||
if unfinished != 0 {
|
||||
return fmt.Errorf("NodeUnprepareResources left out %d claims", unfinished)
|
||||
}
|
||||
}
|
||||
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err := m.cache.withLock(func() error {
|
||||
// Delete all claimInfos from the cache that have just been unprepared.
|
||||
for _, claimName := range claimNamesMap {
|
||||
claimInfo, _ := m.cache.get(claimName, namespace)
|
||||
m.cache.delete(claimName, namespace)
|
||||
logger.V(6).Info("Deleted claim info cache entry", "claim", klog.KRef(namespace, claimName), "claimInfoEntry", claimInfo)
|
||||
}
|
||||
|
||||
// Atomically sync the cache back to the checkpoint.
|
||||
if err := m.cache.syncToCheckpoint(); err != nil {
|
||||
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod might need to
|
||||
// unprepare resources
|
||||
func (m *ManagerImpl) PodMightNeedToUnprepareResources(uid types.UID) bool {
|
||||
m.cache.Lock()
|
||||
defer m.cache.Unlock()
|
||||
return m.cache.hasPodReference(uid)
|
||||
}
|
||||
|
||||
// GetContainerClaimInfos gets Container's ClaimInfo
|
||||
func (m *ManagerImpl) GetContainerClaimInfos(pod *v1.Pod, container *v1.Container) ([]*ClaimInfo, error) {
|
||||
claimInfos := make([]*ClaimInfo, 0, len(pod.Spec.ResourceClaims))
|
||||
|
||||
for i, podResourceClaim := range pod.Spec.ResourceClaims {
|
||||
claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("determine resource claim information: %w", err)
|
||||
}
|
||||
|
||||
for _, claim := range container.Resources.Claims {
|
||||
if podResourceClaim.Name != claim.Name {
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.cache.withRLock(func() error {
|
||||
claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
|
||||
}
|
||||
claimInfos = append(claimInfos, claimInfo.DeepCopy())
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return claimInfos, nil
|
||||
}
|
181
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugin.go
generated
vendored
Normal file
181
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugin.go
generated
vendored
Normal file
@ -0,0 +1,181 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package plugin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/connectivity"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
drapbv1alpha4 "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
|
||||
drapbv1beta1 "k8s.io/kubelet/pkg/apis/dra/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
)
|
||||
|
||||
// NewDRAPluginClient returns a wrapper around those gRPC methods of a DRA
|
||||
// driver kubelet plugin which need to be called by kubelet. The wrapper
|
||||
// handles gRPC connection management and logging. Connections are reused
|
||||
// across different NewDRAPluginClient calls.
|
||||
func NewDRAPluginClient(pluginName string) (*Plugin, error) {
|
||||
if pluginName == "" {
|
||||
return nil, fmt.Errorf("plugin name is empty")
|
||||
}
|
||||
|
||||
existingPlugin := draPlugins.get(pluginName)
|
||||
if existingPlugin == nil {
|
||||
return nil, fmt.Errorf("plugin name %s not found in the list of registered DRA plugins", pluginName)
|
||||
}
|
||||
|
||||
return existingPlugin, nil
|
||||
}
|
||||
|
||||
type Plugin struct {
|
||||
name string
|
||||
backgroundCtx context.Context
|
||||
cancel func(cause error)
|
||||
|
||||
mutex sync.Mutex
|
||||
conn *grpc.ClientConn
|
||||
endpoint string
|
||||
chosenService string // e.g. drapbv1beta1.DRAPluginService
|
||||
clientCallTimeout time.Duration
|
||||
}
|
||||
|
||||
func (p *Plugin) getOrCreateGRPCConn() (*grpc.ClientConn, error) {
|
||||
p.mutex.Lock()
|
||||
defer p.mutex.Unlock()
|
||||
|
||||
if p.conn != nil {
|
||||
return p.conn, nil
|
||||
}
|
||||
|
||||
ctx := p.backgroundCtx
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
network := "unix"
|
||||
logger.V(4).Info("Creating new gRPC connection", "protocol", network, "endpoint", p.endpoint)
|
||||
// grpc.Dial is deprecated. grpc.NewClient should be used instead.
|
||||
// For now this gets ignored because this function is meant to establish
|
||||
// the connection, with the one second timeout below. Perhaps that
|
||||
// approach should be reconsidered?
|
||||
//nolint:staticcheck
|
||||
conn, err := grpc.Dial(
|
||||
p.endpoint,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithContextDialer(func(ctx context.Context, target string) (net.Conn, error) {
|
||||
return (&net.Dialer{}).DialContext(ctx, network, target)
|
||||
}),
|
||||
grpc.WithChainUnaryInterceptor(newMetricsInterceptor(p.name)),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
if ok := conn.WaitForStateChange(ctx, connectivity.Connecting); !ok {
|
||||
return nil, errors.New("timed out waiting for gRPC connection to be ready")
|
||||
}
|
||||
|
||||
p.conn = conn
|
||||
return p.conn, nil
|
||||
}
|
||||
|
||||
func (p *Plugin) NodePrepareResources(
|
||||
ctx context.Context,
|
||||
req *drapbv1beta1.NodePrepareResourcesRequest,
|
||||
opts ...grpc.CallOption,
|
||||
) (*drapbv1beta1.NodePrepareResourcesResponse, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(4).Info("Calling NodePrepareResources rpc", "request", req)
|
||||
|
||||
conn, err := p.getOrCreateGRPCConn()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
var response *drapbv1beta1.NodePrepareResourcesResponse
|
||||
switch p.chosenService {
|
||||
case drapbv1beta1.DRAPluginService:
|
||||
nodeClient := drapbv1beta1.NewDRAPluginClient(conn)
|
||||
response, err = nodeClient.NodePrepareResources(ctx, req)
|
||||
case drapbv1alpha4.NodeService:
|
||||
nodeClient := drapbv1alpha4.NewNodeClient(conn)
|
||||
response, err = drapbv1alpha4.V1Alpha4ClientWrapper{NodeClient: nodeClient}.NodePrepareResources(ctx, req)
|
||||
default:
|
||||
// Shouldn't happen, validateSupportedServices should only
|
||||
// return services we support here.
|
||||
return nil, fmt.Errorf("internal error: unsupported chosen service: %q", p.chosenService)
|
||||
}
|
||||
logger.V(4).Info("Done calling NodePrepareResources rpc", "response", response, "err", err)
|
||||
return response, err
|
||||
}
|
||||
|
||||
func (p *Plugin) NodeUnprepareResources(
|
||||
ctx context.Context,
|
||||
req *drapbv1beta1.NodeUnprepareResourcesRequest,
|
||||
opts ...grpc.CallOption,
|
||||
) (*drapbv1beta1.NodeUnprepareResourcesResponse, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(4).Info("Calling NodeUnprepareResource rpc", "request", req)
|
||||
|
||||
conn, err := p.getOrCreateGRPCConn()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
var response *drapbv1beta1.NodeUnprepareResourcesResponse
|
||||
switch p.chosenService {
|
||||
case drapbv1beta1.DRAPluginService:
|
||||
nodeClient := drapbv1beta1.NewDRAPluginClient(conn)
|
||||
response, err = nodeClient.NodeUnprepareResources(ctx, req)
|
||||
case drapbv1alpha4.NodeService:
|
||||
nodeClient := drapbv1alpha4.NewNodeClient(conn)
|
||||
response, err = drapbv1alpha4.V1Alpha4ClientWrapper{NodeClient: nodeClient}.NodeUnprepareResources(ctx, req)
|
||||
default:
|
||||
// Shouldn't happen, validateSupportedServices should only
|
||||
// return services we support here.
|
||||
return nil, fmt.Errorf("internal error: unsupported chosen service: %q", p.chosenService)
|
||||
}
|
||||
logger.V(4).Info("Done calling NodeUnprepareResources rpc", "response", response, "err", err)
|
||||
return response, err
|
||||
}
|
||||
|
||||
func newMetricsInterceptor(pluginName string) grpc.UnaryClientInterceptor {
|
||||
return func(ctx context.Context, method string, req, reply any, conn *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
|
||||
start := time.Now()
|
||||
err := invoker(ctx, method, req, reply, conn, opts...)
|
||||
metrics.DRAGRPCOperationsDuration.WithLabelValues(pluginName, method, status.Code(err).String()).Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
}
|
||||
}
|
79
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugins_store.go
generated
vendored
Normal file
79
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugins_store.go
generated
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package plugin
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// PluginsStore holds a list of DRA Plugins.
|
||||
type pluginsStore struct {
|
||||
sync.RWMutex
|
||||
store map[string]*Plugin
|
||||
}
|
||||
|
||||
// draPlugins map keeps track of all registered DRA plugins on the node
|
||||
// and their corresponding sockets.
|
||||
var draPlugins = &pluginsStore{}
|
||||
|
||||
// Get lets you retrieve a DRA Plugin by name.
|
||||
// This method is protected by a mutex.
|
||||
func (s *pluginsStore) get(pluginName string) *Plugin {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.store[pluginName]
|
||||
}
|
||||
|
||||
// Set lets you save a DRA Plugin to the list and give it a specific name.
|
||||
// This method is protected by a mutex.
|
||||
func (s *pluginsStore) add(p *Plugin) (replacedPlugin *Plugin, replaced bool) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
if s.store == nil {
|
||||
s.store = make(map[string]*Plugin)
|
||||
}
|
||||
|
||||
replacedPlugin, exists := s.store[p.name]
|
||||
s.store[p.name] = p
|
||||
|
||||
if replacedPlugin != nil && replacedPlugin.cancel != nil {
|
||||
replacedPlugin.cancel(errors.New("plugin got replaced"))
|
||||
}
|
||||
|
||||
return replacedPlugin, exists
|
||||
}
|
||||
|
||||
// Delete lets you delete a DRA Plugin by name.
|
||||
// This method is protected by a mutex.
|
||||
func (s *pluginsStore) delete(pluginName string) *Plugin {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
p, exists := s.store[pluginName]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
if p.cancel != nil {
|
||||
p.cancel(errors.New("plugin got removed"))
|
||||
}
|
||||
delete(s.store, pluginName)
|
||||
|
||||
return p
|
||||
}
|
249
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/registration.go
generated
vendored
Normal file
249
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/registration.go
generated
vendored
Normal file
@ -0,0 +1,249 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package plugin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
drapbv1alpha4 "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
|
||||
drapbv1beta1 "k8s.io/kubelet/pkg/apis/dra/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
// defaultClientCallTimeout is the default amount of time that a DRA driver has
|
||||
// to respond to any of the gRPC calls. kubelet uses this value by passing nil
|
||||
// to RegisterPlugin. Some tests use a different, usually shorter timeout to
|
||||
// speed up testing.
|
||||
//
|
||||
// This is half of the kubelet retry period (according to
|
||||
// https://github.com/kubernetes/kubernetes/commit/0449cef8fd5217d394c5cd331d852bd50983e6b3).
|
||||
const defaultClientCallTimeout = 45 * time.Second
|
||||
|
||||
// RegistrationHandler is the handler which is fed to the pluginwatcher API.
|
||||
type RegistrationHandler struct {
|
||||
// backgroundCtx is used for all future activities of the handler.
|
||||
// This is necessary because it implements APIs which don't
|
||||
// provide a context.
|
||||
backgroundCtx context.Context
|
||||
kubeClient kubernetes.Interface
|
||||
getNode func() (*v1.Node, error)
|
||||
}
|
||||
|
||||
var _ cache.PluginHandler = &RegistrationHandler{}
|
||||
|
||||
// NewPluginHandler returns new registration handler.
|
||||
//
|
||||
// Must only be called once per process because it manages global state.
|
||||
// If a kubeClient is provided, then it synchronizes ResourceSlices
|
||||
// with the resource information provided by plugins.
|
||||
func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *RegistrationHandler {
|
||||
handler := &RegistrationHandler{
|
||||
// The context and thus logger should come from the caller.
|
||||
backgroundCtx: klog.NewContext(context.TODO(), klog.LoggerWithName(klog.TODO(), "DRA registration handler")),
|
||||
kubeClient: kubeClient,
|
||||
getNode: getNode,
|
||||
}
|
||||
|
||||
// When kubelet starts up, no DRA driver has registered yet. None of
|
||||
// the drivers are usable until they come back, which might not happen
|
||||
// at all. Therefore it is better to not advertise any local resources
|
||||
// because pods could get stuck on the node waiting for the driver
|
||||
// to start up.
|
||||
//
|
||||
// This has to run in the background.
|
||||
go handler.wipeResourceSlices("")
|
||||
|
||||
return handler
|
||||
}
|
||||
|
||||
// wipeResourceSlices deletes ResourceSlices of the node, optionally just for a specific driver.
|
||||
func (h *RegistrationHandler) wipeResourceSlices(driver string) {
|
||||
if h.kubeClient == nil {
|
||||
return
|
||||
}
|
||||
ctx := h.backgroundCtx
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
backoff := wait.Backoff{
|
||||
Duration: time.Second,
|
||||
Factor: 2,
|
||||
Jitter: 0.2,
|
||||
Cap: 5 * time.Minute,
|
||||
Steps: 100,
|
||||
}
|
||||
|
||||
// Error logging is done inside the loop. Context cancellation doesn't get logged.
|
||||
_ = wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (bool, error) {
|
||||
node, err := h.getNode()
|
||||
if apierrors.IsNotFound(err) {
|
||||
return false, nil
|
||||
}
|
||||
if err != nil {
|
||||
logger.Error(err, "Unexpected error checking for node")
|
||||
return false, nil
|
||||
}
|
||||
fieldSelector := fields.Set{resourceapi.ResourceSliceSelectorNodeName: node.Name}
|
||||
if driver != "" {
|
||||
fieldSelector[resourceapi.ResourceSliceSelectorDriver] = driver
|
||||
}
|
||||
|
||||
err = h.kubeClient.ResourceV1beta1().ResourceSlices().DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{FieldSelector: fieldSelector.String()})
|
||||
switch {
|
||||
case err == nil:
|
||||
logger.V(3).Info("Deleted ResourceSlices", "fieldSelector", fieldSelector)
|
||||
return true, nil
|
||||
case apierrors.IsUnauthorized(err):
|
||||
// This can happen while kubelet is still figuring out
|
||||
// its credentials.
|
||||
logger.V(5).Info("Deleting ResourceSlice failed, retrying", "fieldSelector", fieldSelector, "err", err)
|
||||
return false, nil
|
||||
default:
|
||||
// Log and retry for other errors.
|
||||
logger.V(3).Info("Deleting ResourceSlice failed, retrying", "fieldSelector", fieldSelector, "err", err)
|
||||
return false, nil
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// RegisterPlugin is called when a plugin can be registered.
|
||||
//
|
||||
// DRA uses the version array in the registration API to enumerate all gRPC
|
||||
// services that the plugin provides, using the "<gRPC package name>.<service
|
||||
// name>" format (e.g. "v1beta1.DRAPlugin"). This allows kubelet to determine
|
||||
// in advance which version to use resp. which optional services the plugin
|
||||
// supports.
|
||||
func (h *RegistrationHandler) RegisterPlugin(pluginName string, endpoint string, supportedServices []string, pluginClientTimeout *time.Duration) error {
|
||||
// Prepare a context with its own logger for the plugin.
|
||||
//
|
||||
// The lifecycle of the plugin's background activities is tied to our
|
||||
// root context, so canceling that will also cancel the plugin.
|
||||
//
|
||||
// The logger injects the plugin name as additional value
|
||||
// into all log output related to the plugin.
|
||||
ctx := h.backgroundCtx
|
||||
logger := klog.FromContext(ctx)
|
||||
logger = klog.LoggerWithValues(logger, "pluginName", pluginName)
|
||||
ctx = klog.NewContext(ctx, logger)
|
||||
|
||||
logger.V(3).Info("Register new DRA plugin", "endpoint", endpoint)
|
||||
|
||||
chosenService, err := h.validateSupportedServices(pluginName, supportedServices)
|
||||
if err != nil {
|
||||
return fmt.Errorf("version check of plugin %s failed: %w", pluginName, err)
|
||||
}
|
||||
|
||||
var timeout time.Duration
|
||||
if pluginClientTimeout == nil {
|
||||
timeout = defaultClientCallTimeout
|
||||
} else {
|
||||
timeout = *pluginClientTimeout
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancelCause(ctx)
|
||||
|
||||
pluginInstance := &Plugin{
|
||||
name: pluginName,
|
||||
backgroundCtx: ctx,
|
||||
cancel: cancel,
|
||||
conn: nil,
|
||||
endpoint: endpoint,
|
||||
chosenService: chosenService,
|
||||
clientCallTimeout: timeout,
|
||||
}
|
||||
|
||||
// Storing endpoint of newly registered DRA Plugin into the map, where plugin name will be the key
|
||||
// all other DRA components will be able to get the actual socket of DRA plugins by its name.
|
||||
|
||||
if oldPlugin, replaced := draPlugins.add(pluginInstance); replaced {
|
||||
logger.V(1).Info("DRA plugin already registered, the old plugin was replaced and will be forgotten by the kubelet till the next kubelet restart", "oldEndpoint", oldPlugin.endpoint)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateSupportedServices identifies the highest supported gRPC service for
|
||||
// NodePrepareResources and NodeUnprepareResources and returns its name
|
||||
// (e.g. [drapbv1beta1.DRAPluginService]). An error is returned if the plugin
|
||||
// is unusable.
|
||||
func (h *RegistrationHandler) validateSupportedServices(pluginName string, supportedServices []string) (string, error) {
|
||||
if len(supportedServices) == 0 {
|
||||
return "", errors.New("empty list of supported gRPC services (aka supported versions)")
|
||||
}
|
||||
|
||||
// Pick most recent version if available.
|
||||
chosenService := ""
|
||||
for _, service := range []string{
|
||||
// Sorted by most recent first, oldest last.
|
||||
drapbv1beta1.DRAPluginService,
|
||||
drapbv1alpha4.NodeService,
|
||||
} {
|
||||
if slices.Contains(supportedServices, service) {
|
||||
chosenService = service
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to alpha if necessary because
|
||||
// plugins at that time didn't advertise gRPC services.
|
||||
if chosenService == "" {
|
||||
chosenService = drapbv1alpha4.NodeService
|
||||
}
|
||||
|
||||
return chosenService, nil
|
||||
}
|
||||
|
||||
// DeRegisterPlugin is called when a plugin has removed its socket,
|
||||
// signaling it is no longer available.
|
||||
func (h *RegistrationHandler) DeRegisterPlugin(pluginName string) {
|
||||
if p := draPlugins.delete(pluginName); p != nil {
|
||||
logger := klog.FromContext(p.backgroundCtx)
|
||||
logger.V(3).Info("Deregister DRA plugin", "endpoint", p.endpoint)
|
||||
|
||||
// Clean up the ResourceSlices for the deleted Plugin since it
|
||||
// may have died without doing so itself and might never come
|
||||
// back.
|
||||
go h.wipeResourceSlices(pluginName)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
logger := klog.FromContext(h.backgroundCtx)
|
||||
logger.V(3).Info("Deregister DRA plugin not necessary, was already removed")
|
||||
}
|
||||
|
||||
// ValidatePlugin is called by kubelet's plugin watcher upon detection
|
||||
// of a new registration socket opened by DRA plugin.
|
||||
func (h *RegistrationHandler) ValidatePlugin(pluginName string, endpoint string, supportedServices []string) error {
|
||||
_, err := h.validateSupportedServices(pluginName, supportedServices)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid versions of plugin %s: %w", pluginName, err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
107
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpoint.go
generated
vendored
Normal file
107
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"hash/crc32"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
const (
|
||||
CheckpointAPIGroup = "checkpoint.dra.kubelet.k8s.io"
|
||||
CheckpointKind = "DRACheckpoint"
|
||||
CheckpointAPIVersion = CheckpointAPIGroup + "/v1"
|
||||
)
|
||||
|
||||
// Checkpoint represents a structure to store DRA checkpoint data
|
||||
type Checkpoint struct {
|
||||
// Data is a JSON serialized checkpoint data
|
||||
Data string
|
||||
// Checksum is a checksum of Data
|
||||
Checksum uint32
|
||||
}
|
||||
|
||||
type CheckpointData struct {
|
||||
metav1.TypeMeta
|
||||
ClaimInfoStateList ClaimInfoStateList
|
||||
}
|
||||
|
||||
// NewCheckpoint creates a new checkpoint from a list of claim info states
|
||||
func NewCheckpoint(data ClaimInfoStateList) (*Checkpoint, error) {
|
||||
cpData := &CheckpointData{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: CheckpointKind,
|
||||
APIVersion: CheckpointAPIVersion,
|
||||
},
|
||||
ClaimInfoStateList: data,
|
||||
}
|
||||
|
||||
cpDataBytes, err := json.Marshal(cpData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cp := &Checkpoint{
|
||||
Data: string(cpDataBytes),
|
||||
Checksum: crc32.ChecksumIEEE(cpDataBytes),
|
||||
}
|
||||
|
||||
return cp, nil
|
||||
}
|
||||
|
||||
// MarshalCheckpoint marshals checkpoint to JSON
|
||||
func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) {
|
||||
return json.Marshal(cp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint unmarshals checkpoint from JSON
|
||||
// and verifies its data checksum
|
||||
func (cp *Checkpoint) UnmarshalCheckpoint(blob []byte) error {
|
||||
if err := json.Unmarshal(blob, cp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// verify checksum
|
||||
if err := cp.VerifyChecksum(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that current checksum
|
||||
// of checkpointed Data is valid
|
||||
func (cp *Checkpoint) VerifyChecksum() error {
|
||||
expectedCS := crc32.ChecksumIEEE([]byte(cp.Data))
|
||||
if expectedCS != cp.Checksum {
|
||||
return &errors.CorruptCheckpointError{ActualCS: uint64(cp.Checksum), ExpectedCS: uint64(expectedCS)}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetClaimInfoStateList returns list of claim info states from checkpoint
|
||||
func (cp *Checkpoint) GetClaimInfoStateList() (ClaimInfoStateList, error) {
|
||||
var data CheckpointData
|
||||
if err := json.Unmarshal([]byte(cp.Data), &data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.ClaimInfoStateList, nil
|
||||
}
|
98
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpointer.go
generated
vendored
Normal file
98
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpointer.go
generated
vendored
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
checkpointerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
type Checkpointer interface {
|
||||
GetOrCreate() (*Checkpoint, error)
|
||||
Store(*Checkpoint) error
|
||||
}
|
||||
|
||||
type checkpointer struct {
|
||||
sync.RWMutex
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
checkpointName string
|
||||
}
|
||||
|
||||
// NewCheckpointer creates new checkpointer for keeping track of claim info with checkpoint backend
|
||||
func NewCheckpointer(stateDir, checkpointName string) (Checkpointer, error) {
|
||||
if len(checkpointName) == 0 {
|
||||
return nil, fmt.Errorf("received empty string instead of checkpointName")
|
||||
}
|
||||
|
||||
checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize checkpoint manager: %w", err)
|
||||
}
|
||||
|
||||
checkpointer := &checkpointer{
|
||||
checkpointManager: checkpointManager,
|
||||
checkpointName: checkpointName,
|
||||
}
|
||||
|
||||
return checkpointer, nil
|
||||
}
|
||||
|
||||
// GetOrCreate gets list of claim info states from a checkpoint
|
||||
// or creates empty list if checkpoint doesn't exist
|
||||
func (sc *checkpointer) GetOrCreate() (*Checkpoint, error) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
checkpoint, err := NewCheckpoint(nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create new checkpoint: %w", err)
|
||||
}
|
||||
|
||||
err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint)
|
||||
if errors.Is(err, checkpointerrors.ErrCheckpointNotFound) {
|
||||
err = sc.store(checkpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to store checkpoint %v: %w", sc.checkpointName, err)
|
||||
}
|
||||
return checkpoint, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
|
||||
}
|
||||
|
||||
return checkpoint, nil
|
||||
}
|
||||
|
||||
// Store stores checkpoint to the file
|
||||
func (sc *checkpointer) Store(checkpoint *Checkpoint) error {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
return sc.store(checkpoint)
|
||||
}
|
||||
|
||||
// store saves state to a checkpoint, caller is responsible for locking
|
||||
func (sc *checkpointer) store(checkpoint *Checkpoint) error {
|
||||
if err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint); err != nil {
|
||||
return fmt.Errorf("could not save checkpoint %s: %w", sc.checkpointName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
59
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/state.go
generated
vendored
Normal file
59
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/state.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
)
|
||||
|
||||
type ClaimInfoStateList []ClaimInfoState
|
||||
|
||||
// +k8s:deepcopy-gen=true
|
||||
type ClaimInfoState struct {
|
||||
// ClaimUID is the UID of a resource claim
|
||||
ClaimUID types.UID
|
||||
|
||||
// ClaimName is the name of a resource claim
|
||||
ClaimName string
|
||||
|
||||
// Namespace is a claim namespace
|
||||
Namespace string
|
||||
|
||||
// PodUIDs is a set of pod UIDs that reference a resource
|
||||
PodUIDs sets.Set[string]
|
||||
|
||||
// DriverState contains information about all drivers which have allocation
|
||||
// results in the claim, even if they don't provide devices for their results.
|
||||
DriverState map[string]DriverState
|
||||
}
|
||||
|
||||
// DriverState is used to store per-device claim info state in a checkpoint
|
||||
// +k8s:deepcopy-gen=true
|
||||
type DriverState struct {
|
||||
Devices []Device
|
||||
}
|
||||
|
||||
// Device is how a DRA driver described an allocated device in a claim
|
||||
// to kubelet. RequestName and CDI device IDs are optional.
|
||||
// +k8s:deepcopy-gen=true
|
||||
type Device struct {
|
||||
PoolName string
|
||||
DeviceName string
|
||||
RequestNames []string
|
||||
CDIDeviceIDs []string
|
||||
}
|
105
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/zz_generated.deepcopy.go
generated
vendored
Normal file
105
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/zz_generated.deepcopy.go
generated
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by deepcopy-gen. DO NOT EDIT.
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
sets "k8s.io/apimachinery/pkg/util/sets"
|
||||
)
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ClaimInfoState) DeepCopyInto(out *ClaimInfoState) {
|
||||
*out = *in
|
||||
if in.PodUIDs != nil {
|
||||
in, out := &in.PodUIDs, &out.PodUIDs
|
||||
*out = make(sets.Set[string], len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.DriverState != nil {
|
||||
in, out := &in.DriverState, &out.DriverState
|
||||
*out = make(map[string]DriverState, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = *val.DeepCopy()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfoState.
|
||||
func (in *ClaimInfoState) DeepCopy() *ClaimInfoState {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ClaimInfoState)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Device) DeepCopyInto(out *Device) {
|
||||
*out = *in
|
||||
if in.RequestNames != nil {
|
||||
in, out := &in.RequestNames, &out.RequestNames
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.CDIDeviceIDs != nil {
|
||||
in, out := &in.CDIDeviceIDs, &out.CDIDeviceIDs
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Device.
|
||||
func (in *Device) DeepCopy() *Device {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(Device)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *DriverState) DeepCopyInto(out *DriverState) {
|
||||
*out = *in
|
||||
if in.Devices != nil {
|
||||
in, out := &in.Devices, &out.Devices
|
||||
*out = make([]Device, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverState.
|
||||
func (in *DriverState) DeepCopy() *DriverState {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(DriverState)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
61
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/types.go
generated
vendored
Normal file
61
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/types.go
generated
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dra
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
// Manager manages all the DRA resource plugins running on a node.
|
||||
type Manager interface {
|
||||
// GetWatcherHandler returns the plugin handler for the DRA.
|
||||
GetWatcherHandler() cache.PluginHandler
|
||||
|
||||
// Start starts the reconcile loop of the manager.
|
||||
// This will ensure that all claims are unprepared even if pods get deleted unexpectedly.
|
||||
Start(ctx context.Context, activePods ActivePodsFunc, getNode GetNodeFunc, sourcesReady config.SourcesReady) error
|
||||
|
||||
// PrepareResources prepares resources for a pod.
|
||||
// It communicates with the DRA resource plugin to prepare resources.
|
||||
PrepareResources(ctx context.Context, pod *v1.Pod) error
|
||||
|
||||
// UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources
|
||||
UnprepareResources(ctx context.Context, pod *v1.Pod) error
|
||||
|
||||
// GetResources gets a ContainerInfo object from the claimInfo cache.
|
||||
// This information is used by the caller to update a container config.
|
||||
GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error)
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
|
||||
// might need to unprepare resources.
|
||||
PodMightNeedToUnprepareResources(UID types.UID) bool
|
||||
|
||||
// GetContainerClaimInfos gets Container ClaimInfo objects
|
||||
GetContainerClaimInfos(pod *v1.Pod, container *v1.Container) ([]*ClaimInfo, error)
|
||||
}
|
||||
|
||||
// ContainerInfo contains information required by the runtime to consume prepared resources.
|
||||
type ContainerInfo struct {
|
||||
// CDI Devices for the container
|
||||
CDIDevices []kubecontainer.CDIDevice
|
||||
}
|
39
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/zz_generated.deepcopy.go
generated
vendored
Normal file
39
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/zz_generated.deepcopy.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by deepcopy-gen. DO NOT EDIT.
|
||||
|
||||
package dra
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ClaimInfo) DeepCopyInto(out *ClaimInfo) {
|
||||
*out = *in
|
||||
in.ClaimInfoState.DeepCopyInto(&out.ClaimInfoState)
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfo.
|
||||
func (in *ClaimInfo) DeepCopy() *ClaimInfo {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ClaimInfo)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
270
vendor/k8s.io/kubernetes/pkg/kubelet/cm/fake_container_manager.go
generated
vendored
Normal file
270
vendor/k8s.io/kubernetes/pkg/kubelet/cm/fake_container_manager.go
generated
vendored
Normal file
@ -0,0 +1,270 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/resourceupdates"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
type FakeContainerManager struct {
|
||||
sync.Mutex
|
||||
CalledFunctions []string
|
||||
PodContainerManager *FakePodContainerManager
|
||||
shouldResetExtendedResourceCapacity bool
|
||||
}
|
||||
|
||||
var _ ContainerManager = &FakeContainerManager{}
|
||||
|
||||
func NewFakeContainerManager() *FakeContainerManager {
|
||||
return &FakeContainerManager{
|
||||
PodContainerManager: NewFakePodContainerManager(),
|
||||
}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) Start(_ context.Context, _ *v1.Node, _ ActivePodsFunc, _ GetNodeFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "Start")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) SystemCgroupsLimit() v1.ResourceList {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "SystemCgroupsLimit")
|
||||
return v1.ResourceList{}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetNodeConfig() NodeConfig {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetNodeConfig")
|
||||
return NodeConfig{}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetMountedSubsystems() *CgroupSubsystems {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetMountedSubsystems")
|
||||
return &CgroupSubsystems{}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetQOSContainersInfo() QOSContainersInfo {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "QOSContainersInfo")
|
||||
return QOSContainersInfo{}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) UpdateQOSCgroups() error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "UpdateQOSCgroups")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) Status() Status {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "Status")
|
||||
return Status{}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetNodeAllocatableReservation() v1.ResourceList {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetNodeAllocatableReservation")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetCapacity")
|
||||
if !localStorageCapacityIsolation {
|
||||
return v1.ResourceList{}
|
||||
}
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(
|
||||
int64(0),
|
||||
resource.BinarySI),
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetPluginRegistrationHandlers() map[string]cache.PluginHandler {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetPluginRegistrationHandlers")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetHealthCheckers() []healthz.HealthChecker {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetPluginRegistrationServerChecker")
|
||||
return []healthz.HealthChecker{}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetDevicePluginResourceCapacity")
|
||||
return nil, nil, []string{}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) NewPodContainerManager() PodContainerManager {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "PodContainerManager")
|
||||
return cm.PodContainerManager
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetResources")
|
||||
return &kubecontainer.RunContainerOptions{}, nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "UpdatePluginResources")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) InternalContainerLifecycle() InternalContainerLifecycle {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "InternalContainerLifecycle")
|
||||
return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), memorymanager.NewFakeManager(), topologymanager.NewFakeManager()}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetPodCgroupRoot() string {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupRoot")
|
||||
return ""
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetDevices(_, _ string) []*podresourcesapi.ContainerDevices {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetDevices")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetAllocatableDevices() []*podresourcesapi.ContainerDevices {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetAllocatableDevices")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) ShouldResetExtendedResourceCapacity() bool {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "ShouldResetExtendedResourceCapacity")
|
||||
return cm.shouldResetExtendedResourceCapacity
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetAllocateResourcesPodAdmitHandler")
|
||||
return topologymanager.NewFakeManager()
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) UpdateAllocatedDevices() {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "UpdateAllocatedDevices")
|
||||
return
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetCPUs(_, _ string) []int64 {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetCPUs")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetAllocatableCPUs() []int64 {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetMemory(_, _ string) []*podresourcesapi.ContainerMemory {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetMemory")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetAllocatableMemory() []*podresourcesapi.ContainerMemory {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetDynamicResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.DynamicResource {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetNodeAllocatableAbsolute() v1.ResourceList {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) UnprepareDynamicResources(context.Context, *v1.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) PodMightNeedToUnprepareResources(UID types.UID) bool {
|
||||
return false
|
||||
}
|
||||
func (cm *FakeContainerManager) UpdateAllocatedResourcesStatus(pod *v1.Pod, status *v1.PodStatus) {
|
||||
}
|
||||
func (cm *FakeContainerManager) Updates() <-chan resourceupdates.Update {
|
||||
return nil
|
||||
}
|
40
vendor/k8s.io/kubernetes/pkg/kubelet/cm/fake_internal_container_lifecycle.go
generated
vendored
Normal file
40
vendor/k8s.io/kubernetes/pkg/kubelet/cm/fake_internal_container_lifecycle.go
generated
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func NewFakeInternalContainerLifecycle() *fakeInternalContainerLifecycle {
|
||||
return &fakeInternalContainerLifecycle{}
|
||||
}
|
||||
|
||||
type fakeInternalContainerLifecycle struct{}
|
||||
|
||||
func (f *fakeInternalContainerLifecycle) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakeInternalContainerLifecycle) PreStartContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakeInternalContainerLifecycle) PostStopContainer(containerID string) error {
|
||||
return nil
|
||||
}
|
127
vendor/k8s.io/kubernetes/pkg/kubelet/cm/fake_pod_container_manager.go
generated
vendored
Normal file
127
vendor/k8s.io/kubernetes/pkg/kubelet/cm/fake_pod_container_manager.go
generated
vendored
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sync"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
type FakePodContainerManager struct {
|
||||
sync.Mutex
|
||||
CalledFunctions []string
|
||||
Cgroups map[types.UID]CgroupName
|
||||
}
|
||||
|
||||
var _ PodContainerManager = &FakePodContainerManager{}
|
||||
|
||||
func NewFakePodContainerManager() *FakePodContainerManager {
|
||||
return &FakePodContainerManager{
|
||||
Cgroups: make(map[types.UID]CgroupName),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) AddPodFromCgroups(pod *kubecontainer.Pod) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.Cgroups[pod.ID] = []string{pod.Name}
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) Exists(_ *v1.Pod) bool {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.CalledFunctions = append(m.CalledFunctions, "Exists")
|
||||
return true
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) EnsureExists(_ *v1.Pod) error {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.CalledFunctions = append(m.CalledFunctions, "EnsureExists")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) GetPodContainerName(_ *v1.Pod) (CgroupName, string) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.CalledFunctions = append(m.CalledFunctions, "GetPodContainerName")
|
||||
return nil, ""
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) Destroy(name CgroupName) error {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.CalledFunctions = append(m.CalledFunctions, "Destroy")
|
||||
for key, cgname := range m.Cgroups {
|
||||
if reflect.DeepEqual(cgname, name) {
|
||||
delete(m.Cgroups, key)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) ReduceCPULimits(_ CgroupName) error {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.CalledFunctions = append(m.CalledFunctions, "ReduceCPULimits")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.CalledFunctions = append(m.CalledFunctions, "GetAllPodsFromCgroups")
|
||||
// return a copy for the race detector
|
||||
grp := make(map[types.UID]CgroupName)
|
||||
for key, value := range m.Cgroups {
|
||||
grp[key] = value
|
||||
}
|
||||
return grp, nil
|
||||
}
|
||||
|
||||
func (m *FakePodContainerManager) IsPodCgroup(cgroupfs string) (bool, types.UID) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.CalledFunctions = append(m.CalledFunctions, "IsPodCgroup")
|
||||
return false, types.UID("")
|
||||
}
|
||||
|
||||
func (cm *FakePodContainerManager) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupMemoryUsage")
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (cm *FakePodContainerManager) GetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName) (*ResourceConfig, error) {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupConfig")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (cm *FakePodContainerManager) SetPodCgroupConfig(pod *v1.Pod, resourceConfig *ResourceConfig) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "SetPodCgroupConfig")
|
||||
return nil
|
||||
}
|
89
vendor/k8s.io/kubernetes/pkg/kubelet/cm/helpers.go
generated
vendored
Normal file
89
vendor/k8s.io/kubernetes/pkg/kubelet/cm/helpers.go
generated
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
)
|
||||
|
||||
// for typecheck across platforms
|
||||
var _ func(int64, int64) int64 = MilliCPUToQuota
|
||||
var _ func(int64) uint64 = MilliCPUToShares
|
||||
var _ func(*v1.Pod, bool, uint64, bool) *ResourceConfig = ResourceConfigForPod
|
||||
var _ func() (*CgroupSubsystems, error) = GetCgroupSubsystems
|
||||
var _ func(string) ([]int, error) = getCgroupProcs
|
||||
var _ func(types.UID) string = GetPodCgroupNameSuffix
|
||||
var _ func(string, bool, string) string = NodeAllocatableRoot
|
||||
var _ func(string) (string, error) = GetKubeletContainer
|
||||
|
||||
// hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds.
|
||||
func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList {
|
||||
if len(thresholds) == 0 {
|
||||
return nil
|
||||
}
|
||||
ret := v1.ResourceList{}
|
||||
for _, threshold := range thresholds {
|
||||
if threshold.Operator != evictionapi.OpLessThan {
|
||||
continue
|
||||
}
|
||||
switch threshold.Signal {
|
||||
case evictionapi.SignalMemoryAvailable:
|
||||
memoryCapacity := capacity[v1.ResourceMemory]
|
||||
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
|
||||
ret[v1.ResourceMemory] = *value
|
||||
case evictionapi.SignalNodeFsAvailable:
|
||||
storageCapacity := capacity[v1.ResourceEphemeralStorage]
|
||||
value := evictionapi.GetThresholdQuantity(threshold.Value, &storageCapacity)
|
||||
ret[v1.ResourceEphemeralStorage] = *value
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func buildContainerMapAndRunningSetFromRuntime(ctx context.Context, runtimeService internalapi.RuntimeService) (containermap.ContainerMap, sets.Set[string]) {
|
||||
podSandboxMap := make(map[string]string)
|
||||
podSandboxList, _ := runtimeService.ListPodSandbox(ctx, nil)
|
||||
for _, p := range podSandboxList {
|
||||
podSandboxMap[p.Id] = p.Metadata.Uid
|
||||
}
|
||||
|
||||
runningSet := sets.New[string]()
|
||||
containerMap := containermap.NewContainerMap()
|
||||
containerList, _ := runtimeService.ListContainers(ctx, nil)
|
||||
for _, c := range containerList {
|
||||
if _, exists := podSandboxMap[c.PodSandboxId]; !exists {
|
||||
klog.InfoS("No PodSandBox found for the container", "podSandboxId", c.PodSandboxId, "containerName", c.Metadata.Name, "containerId", c.Id)
|
||||
continue
|
||||
}
|
||||
podUID := podSandboxMap[c.PodSandboxId]
|
||||
containerMap.Add(podUID, c.Metadata.Name, c.Id)
|
||||
if c.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
|
||||
klog.V(4).InfoS("Container reported running", "podSandboxId", c.PodSandboxId, "podUID", podUID, "containerName", c.Metadata.Name, "containerId", c.Id)
|
||||
runningSet.Insert(c.Id)
|
||||
}
|
||||
}
|
||||
return containerMap, runningSet
|
||||
}
|
343
vendor/k8s.io/kubernetes/pkg/kubelet/cm/helpers_linux.go
generated
vendored
Normal file
343
vendor/k8s.io/kubernetes/pkg/kubelet/cm/helpers_linux.go
generated
vendored
Normal file
@ -0,0 +1,343 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
|
||||
"k8s.io/component-helpers/resource"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/util"
|
||||
)
|
||||
|
||||
const (
|
||||
// These limits are defined in the kernel:
|
||||
// https://github.com/torvalds/linux/blob/0bddd227f3dc55975e2b8dfa7fc6f959b062a2c7/kernel/sched/sched.h#L427-L428
|
||||
MinShares = 2
|
||||
MaxShares = 262144
|
||||
|
||||
SharesPerCPU = 1024
|
||||
MilliCPUToCPU = 1000
|
||||
|
||||
// 100000 microseconds is equivalent to 100ms
|
||||
QuotaPeriod = 100000
|
||||
// 1000 microseconds is equivalent to 1ms
|
||||
// defined here:
|
||||
// https://github.com/torvalds/linux/blob/cac03ac368fabff0122853de2422d4e17a32de08/kernel/sched/core.c#L10546
|
||||
MinQuotaPeriod = 1000
|
||||
|
||||
// From the inverse of the conversion in MilliCPUToQuota:
|
||||
// MinQuotaPeriod * MilliCPUToCPU / QuotaPeriod
|
||||
MinMilliCPULimit = 10
|
||||
)
|
||||
|
||||
// MilliCPUToQuota converts milliCPU to CFS quota and period values.
|
||||
// Input parameters and resulting value is number of microseconds.
|
||||
func MilliCPUToQuota(milliCPU int64, period int64) (quota int64) {
|
||||
// CFS quota is measured in two values:
|
||||
// - cfs_period_us=100ms (the amount of time to measure usage across given by period)
|
||||
// - cfs_quota=20ms (the amount of cpu time allowed to be used across a period)
|
||||
// so in the above example, you are limited to 20% of a single CPU
|
||||
// for multi-cpu environments, you just scale equivalent amounts
|
||||
// see https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt for details
|
||||
|
||||
if milliCPU == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUCFSQuotaPeriod) {
|
||||
period = QuotaPeriod
|
||||
}
|
||||
|
||||
// we then convert your milliCPU to a value normalized over a period
|
||||
quota = (milliCPU * period) / MilliCPUToCPU
|
||||
|
||||
// quota needs to be a minimum of 1ms.
|
||||
if quota < MinQuotaPeriod {
|
||||
quota = MinQuotaPeriod
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MilliCPUToShares converts the milliCPU to CFS shares.
|
||||
func MilliCPUToShares(milliCPU int64) uint64 {
|
||||
if milliCPU == 0 {
|
||||
// Docker converts zero milliCPU to unset, which maps to kernel default
|
||||
// for unset: 1024. Return 2 here to really match kernel default for
|
||||
// zero milliCPU.
|
||||
return MinShares
|
||||
}
|
||||
// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
|
||||
shares := (milliCPU * SharesPerCPU) / MilliCPUToCPU
|
||||
if shares < MinShares {
|
||||
return MinShares
|
||||
}
|
||||
if shares > MaxShares {
|
||||
return MaxShares
|
||||
}
|
||||
return uint64(shares)
|
||||
}
|
||||
|
||||
// HugePageLimits converts the API representation to a map
|
||||
// from huge page size (in bytes) to huge page limit (in bytes).
|
||||
func HugePageLimits(resourceList v1.ResourceList) map[int64]int64 {
|
||||
hugePageLimits := map[int64]int64{}
|
||||
for k, v := range resourceList {
|
||||
if v1helper.IsHugePageResourceName(k) {
|
||||
pageSize, _ := v1helper.HugePageSizeFromResourceName(k)
|
||||
if value, exists := hugePageLimits[pageSize.Value()]; exists {
|
||||
hugePageLimits[pageSize.Value()] = value + v.Value()
|
||||
} else {
|
||||
hugePageLimits[pageSize.Value()] = v.Value()
|
||||
}
|
||||
}
|
||||
}
|
||||
return hugePageLimits
|
||||
}
|
||||
|
||||
// ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
|
||||
func ResourceConfigForPod(allocatedPod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64, enforceMemoryQoS bool) *ResourceConfig {
|
||||
podLevelResourcesEnabled := utilfeature.DefaultFeatureGate.Enabled(kubefeatures.PodLevelResources)
|
||||
// sum requests and limits.
|
||||
reqs := resource.PodRequests(allocatedPod, resource.PodResourcesOptions{
|
||||
// SkipPodLevelResources is set to false when PodLevelResources feature is enabled.
|
||||
SkipPodLevelResources: !podLevelResourcesEnabled,
|
||||
UseStatusResources: false,
|
||||
})
|
||||
// track if limits were applied for each resource.
|
||||
memoryLimitsDeclared := true
|
||||
cpuLimitsDeclared := true
|
||||
|
||||
limits := resource.PodLimits(allocatedPod, resource.PodResourcesOptions{
|
||||
// SkipPodLevelResources is set to false when PodLevelResources feature is enabled.
|
||||
SkipPodLevelResources: !podLevelResourcesEnabled,
|
||||
ContainerFn: func(res v1.ResourceList, containerType resource.ContainerType) {
|
||||
if res.Cpu().IsZero() {
|
||||
cpuLimitsDeclared = false
|
||||
}
|
||||
if res.Memory().IsZero() {
|
||||
memoryLimitsDeclared = false
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
if podLevelResourcesEnabled && resource.IsPodLevelResourcesSet(allocatedPod) {
|
||||
if !allocatedPod.Spec.Resources.Limits.Cpu().IsZero() {
|
||||
cpuLimitsDeclared = true
|
||||
}
|
||||
|
||||
if !allocatedPod.Spec.Resources.Limits.Memory().IsZero() {
|
||||
memoryLimitsDeclared = true
|
||||
}
|
||||
}
|
||||
// map hugepage pagesize (bytes) to limits (bytes)
|
||||
hugePageLimits := HugePageLimits(reqs)
|
||||
|
||||
cpuRequests := int64(0)
|
||||
cpuLimits := int64(0)
|
||||
memoryLimits := int64(0)
|
||||
if request, found := reqs[v1.ResourceCPU]; found {
|
||||
cpuRequests = request.MilliValue()
|
||||
}
|
||||
if limit, found := limits[v1.ResourceCPU]; found {
|
||||
cpuLimits = limit.MilliValue()
|
||||
}
|
||||
if limit, found := limits[v1.ResourceMemory]; found {
|
||||
memoryLimits = limit.Value()
|
||||
}
|
||||
|
||||
// convert to CFS values
|
||||
cpuShares := MilliCPUToShares(cpuRequests)
|
||||
cpuQuota := MilliCPUToQuota(cpuLimits, int64(cpuPeriod))
|
||||
|
||||
// quota is not capped when cfs quota is disabled
|
||||
if !enforceCPULimits {
|
||||
cpuQuota = int64(-1)
|
||||
}
|
||||
|
||||
// determine the qos class
|
||||
qosClass := v1qos.GetPodQOS(allocatedPod)
|
||||
|
||||
// build the result
|
||||
result := &ResourceConfig{}
|
||||
if qosClass == v1.PodQOSGuaranteed {
|
||||
result.CPUShares = &cpuShares
|
||||
result.CPUQuota = &cpuQuota
|
||||
result.CPUPeriod = &cpuPeriod
|
||||
result.Memory = &memoryLimits
|
||||
} else if qosClass == v1.PodQOSBurstable {
|
||||
result.CPUShares = &cpuShares
|
||||
if cpuLimitsDeclared {
|
||||
result.CPUQuota = &cpuQuota
|
||||
result.CPUPeriod = &cpuPeriod
|
||||
}
|
||||
if memoryLimitsDeclared {
|
||||
result.Memory = &memoryLimits
|
||||
}
|
||||
} else {
|
||||
shares := uint64(MinShares)
|
||||
result.CPUShares = &shares
|
||||
}
|
||||
result.HugePageLimit = hugePageLimits
|
||||
|
||||
if enforceMemoryQoS {
|
||||
memoryMin := int64(0)
|
||||
if request, found := reqs[v1.ResourceMemory]; found {
|
||||
memoryMin = request.Value()
|
||||
}
|
||||
if memoryMin > 0 {
|
||||
result.Unified = map[string]string{
|
||||
Cgroup2MemoryMin: strconv.FormatInt(memoryMin, 10),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// getCgroupSubsystemsV1 returns information about the mounted cgroup v1 subsystems
|
||||
func getCgroupSubsystemsV1() (*CgroupSubsystems, error) {
|
||||
// get all cgroup mounts.
|
||||
allCgroups, err := libcontainercgroups.GetCgroupMounts(true)
|
||||
if err != nil {
|
||||
return &CgroupSubsystems{}, err
|
||||
}
|
||||
if len(allCgroups) == 0 {
|
||||
return &CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts")
|
||||
}
|
||||
mountPoints := make(map[string]string, len(allCgroups))
|
||||
for _, mount := range allCgroups {
|
||||
// BEFORE kubelet used a random mount point per cgroups subsystem;
|
||||
// NOW more deterministic: kubelet use mount point with shortest path;
|
||||
// FUTURE is bright with clear expectation determined in doc.
|
||||
// ref. issue: https://github.com/kubernetes/kubernetes/issues/95488
|
||||
|
||||
for _, subsystem := range mount.Subsystems {
|
||||
previous := mountPoints[subsystem]
|
||||
if previous == "" || len(mount.Mountpoint) < len(previous) {
|
||||
mountPoints[subsystem] = mount.Mountpoint
|
||||
}
|
||||
}
|
||||
}
|
||||
return &CgroupSubsystems{
|
||||
Mounts: allCgroups,
|
||||
MountPoints: mountPoints,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getCgroupSubsystemsV2 returns information about the enabled cgroup v2 subsystems
|
||||
func getCgroupSubsystemsV2() (*CgroupSubsystems, error) {
|
||||
controllers, err := libcontainercgroups.GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mounts := []libcontainercgroups.Mount{}
|
||||
mountPoints := make(map[string]string, len(controllers))
|
||||
for _, controller := range controllers {
|
||||
mountPoints[controller] = util.CgroupRoot
|
||||
m := libcontainercgroups.Mount{
|
||||
Mountpoint: util.CgroupRoot,
|
||||
Root: util.CgroupRoot,
|
||||
Subsystems: []string{controller},
|
||||
}
|
||||
mounts = append(mounts, m)
|
||||
}
|
||||
|
||||
return &CgroupSubsystems{
|
||||
Mounts: mounts,
|
||||
MountPoints: mountPoints,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetCgroupSubsystems returns information about the mounted cgroup subsystems
|
||||
func GetCgroupSubsystems() (*CgroupSubsystems, error) {
|
||||
if libcontainercgroups.IsCgroup2UnifiedMode() {
|
||||
return getCgroupSubsystemsV2()
|
||||
}
|
||||
|
||||
return getCgroupSubsystemsV1()
|
||||
}
|
||||
|
||||
// getCgroupProcs takes a cgroup directory name as an argument
|
||||
// reads through the cgroup's procs file and returns a list of tgid's.
|
||||
// It returns an empty list if a procs file doesn't exists
|
||||
func getCgroupProcs(dir string) ([]int, error) {
|
||||
procsFile := filepath.Join(dir, "cgroup.procs")
|
||||
f, err := os.Open(procsFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// The procsFile does not exist, So no pids attached to this directory
|
||||
return []int{}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
out := []int{}
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected line in %v; could not convert to pid: %v", procsFile, err)
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// GetPodCgroupNameSuffix returns the last element of the pod CgroupName identifier
|
||||
func GetPodCgroupNameSuffix(podUID types.UID) string {
|
||||
return podCgroupNamePrefix + string(podUID)
|
||||
}
|
||||
|
||||
// NodeAllocatableRoot returns the literal cgroup path for the node allocatable cgroup
|
||||
func NodeAllocatableRoot(cgroupRoot string, cgroupsPerQOS bool, cgroupDriver string) string {
|
||||
nodeAllocatableRoot := ParseCgroupfsToCgroupName(cgroupRoot)
|
||||
if cgroupsPerQOS {
|
||||
nodeAllocatableRoot = NewCgroupName(nodeAllocatableRoot, defaultNodeAllocatableCgroupName)
|
||||
}
|
||||
if cgroupDriver == "systemd" {
|
||||
return nodeAllocatableRoot.ToSystemd()
|
||||
}
|
||||
return nodeAllocatableRoot.ToCgroupfs()
|
||||
}
|
||||
|
||||
// GetKubeletContainer returns the cgroup the kubelet will use
|
||||
func GetKubeletContainer(kubeletCgroups string) (string, error) {
|
||||
if kubeletCgroups == "" {
|
||||
cont, err := getContainer(os.Getpid())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return cont, nil
|
||||
}
|
||||
return kubeletCgroups, nil
|
||||
}
|
76
vendor/k8s.io/kubernetes/pkg/kubelet/cm/helpers_unsupported.go
generated
vendored
Normal file
76
vendor/k8s.io/kubernetes/pkg/kubelet/cm/helpers_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
||||
const (
|
||||
MinShares = 0
|
||||
MaxShares = 0
|
||||
|
||||
SharesPerCPU = 0
|
||||
MilliCPUToCPU = 0
|
||||
|
||||
QuotaPeriod = 0
|
||||
MinQuotaPeriod = 0
|
||||
MinMilliCPULimit = 0
|
||||
)
|
||||
|
||||
// MilliCPUToQuota converts milliCPU and period to CFS quota values.
|
||||
func MilliCPUToQuota(milliCPU, period int64) int64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// MilliCPUToShares converts the milliCPU to CFS shares.
|
||||
func MilliCPUToShares(milliCPU int64) uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
|
||||
func ResourceConfigForPod(pod *v1.Pod, enforceCPULimit bool, cpuPeriod uint64, enforceMemoryQoS bool) *ResourceConfig {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetCgroupSubsystems returns information about the mounted cgroup subsystems
|
||||
func GetCgroupSubsystems() (*CgroupSubsystems, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func getCgroupProcs(dir string) ([]int, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// GetPodCgroupNameSuffix returns the last element of the pod CgroupName identifier
|
||||
func GetPodCgroupNameSuffix(podUID types.UID) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// NodeAllocatableRoot returns the literal cgroup path for the node allocatable cgroup
|
||||
func NodeAllocatableRoot(cgroupRoot string, cgroupsPerQOS bool, cgroupDriver string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetKubeletContainer returns the cgroup the kubelet will use
|
||||
func GetKubeletContainer(kubeletCgroups string) (string, error) {
|
||||
return "", nil
|
||||
}
|
56
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle.go
generated
vendored
Normal file
56
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle.go
generated
vendored
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
)
|
||||
|
||||
type InternalContainerLifecycle interface {
|
||||
PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error
|
||||
PreStartContainer(pod *v1.Pod, container *v1.Container, containerID string) error
|
||||
PostStopContainer(containerID string) error
|
||||
}
|
||||
|
||||
// Implements InternalContainerLifecycle interface.
|
||||
type internalContainerLifecycleImpl struct {
|
||||
cpuManager cpumanager.Manager
|
||||
memoryManager memorymanager.Manager
|
||||
topologyManager topologymanager.Manager
|
||||
}
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreStartContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
|
||||
if i.cpuManager != nil {
|
||||
i.cpuManager.AddContainer(pod, container, containerID)
|
||||
}
|
||||
|
||||
if i.memoryManager != nil {
|
||||
i.memoryManager.AddContainer(pod, container, containerID)
|
||||
}
|
||||
|
||||
i.topologyManager.AddContainer(pod, container, containerID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PostStopContainer(containerID string) error {
|
||||
return i.topologyManager.RemoveContainer(containerID)
|
||||
}
|
51
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle_linux.go
generated
vendored
Normal file
51
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle_linux.go
generated
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
||||
if i.cpuManager != nil {
|
||||
allocatedCPUs := i.cpuManager.GetCPUAffinity(string(pod.UID), container.Name)
|
||||
if !allocatedCPUs.IsEmpty() {
|
||||
containerConfig.Linux.Resources.CpusetCpus = allocatedCPUs.String()
|
||||
}
|
||||
}
|
||||
|
||||
if i.memoryManager != nil {
|
||||
numaNodes := i.memoryManager.GetMemoryNUMANodes(pod, container)
|
||||
if numaNodes.Len() > 0 {
|
||||
var affinity []string
|
||||
for _, numaNode := range sets.List(numaNodes) {
|
||||
affinity = append(affinity, strconv.Itoa(numaNode))
|
||||
}
|
||||
containerConfig.Linux.Resources.CpusetMems = strings.Join(affinity, ",")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
29
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle_unsupported.go
generated
vendored
Normal file
29
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
//go:build !linux && !windows
|
||||
// +build !linux,!windows
|
||||
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
||||
return nil
|
||||
}
|
141
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle_windows.go
generated
vendored
Normal file
141
vendor/k8s.io/kubernetes/pkg/kubelet/cm/internal_container_lifecycle_windows.go
generated
vendored
Normal file
@ -0,0 +1,141 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/klog/v2"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/winstats"
|
||||
"k8s.io/utils/cpuset"
|
||||
)
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WindowsCPUAndMemoryAffinity) {
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.V(4).Info("PreCreateContainer for Windows")
|
||||
|
||||
// retrieve CPU and NUMA affinity from CPU Manager and Memory Manager (if enabled)
|
||||
var allocatedCPUs cpuset.CPUSet
|
||||
if i.cpuManager != nil {
|
||||
allocatedCPUs = i.cpuManager.GetCPUAffinity(string(pod.UID), container.Name)
|
||||
}
|
||||
|
||||
var numaNodes sets.Set[int]
|
||||
if i.memoryManager != nil {
|
||||
numaNodes = i.memoryManager.GetMemoryNUMANodes(pod, container)
|
||||
}
|
||||
|
||||
// Gather all CPUs associated with the selected NUMA nodes
|
||||
var allNumaNodeCPUs []winstats.GroupAffinity
|
||||
for _, numaNode := range sets.List(numaNodes) {
|
||||
affinity, err := winstats.GetCPUsforNUMANode(uint16(numaNode))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get CPUs for NUMA node %d: %v", numaNode, err)
|
||||
}
|
||||
allNumaNodeCPUs = append(allNumaNodeCPUs, *affinity)
|
||||
}
|
||||
|
||||
var finalCPUSet = computeFinalCpuSet(allocatedCPUs, allNumaNodeCPUs)
|
||||
|
||||
klog.V(4).InfoS("Setting CPU affinity", "affinity", finalCPUSet, "container", container.Name, "pod", pod.UID)
|
||||
|
||||
// Set CPU group affinities in the container config
|
||||
if finalCPUSet != nil {
|
||||
var cpusToGroupAffinities []*runtimeapi.WindowsCpuGroupAffinity
|
||||
for group, mask := range groupMasks(finalCPUSet) {
|
||||
|
||||
cpusToGroupAffinities = append(cpusToGroupAffinities, &runtimeapi.WindowsCpuGroupAffinity{
|
||||
CpuGroup: uint32(group),
|
||||
CpuMask: uint64(mask),
|
||||
})
|
||||
}
|
||||
containerConfig.Windows.Resources.AffinityCpus = cpusToGroupAffinities
|
||||
}
|
||||
|
||||
// return nil if no CPUs were selected
|
||||
return nil
|
||||
}
|
||||
|
||||
// computeFinalCpuSet determines the final set of CPUs to use based on the CPU and memory managers
|
||||
// and is extracted so that it can be tested
|
||||
func computeFinalCpuSet(allocatedCPUs cpuset.CPUSet, allNumaNodeCPUs []winstats.GroupAffinity) sets.Set[int] {
|
||||
if !allocatedCPUs.IsEmpty() && len(allNumaNodeCPUs) > 0 {
|
||||
// Both CPU and memory managers are enabled
|
||||
|
||||
numaNodeAffinityCPUSet := computeCPUSet(allNumaNodeCPUs)
|
||||
cpuManagerAffinityCPUSet := sets.New[int](allocatedCPUs.List()...)
|
||||
|
||||
// Determine which set of CPUs to use using the following logic outlined in the KEP:
|
||||
// Case 1: CPU manager selects more CPUs than those available in the NUMA nodes selected by the memory manager
|
||||
// Case 2: CPU manager selects fewer CPUs, and they all fall within the CPUs available in the NUMA nodes selected by the memory manager
|
||||
// Case 3: CPU manager selects fewer CPUs, but some are outside of the CPUs available in the NUMA nodes selected by the memory manager
|
||||
|
||||
if cpuManagerAffinityCPUSet.Len() > numaNodeAffinityCPUSet.Len() {
|
||||
// Case 1, use CPU manager selected CPUs
|
||||
return cpuManagerAffinityCPUSet
|
||||
} else if numaNodeAffinityCPUSet.IsSuperset(cpuManagerAffinityCPUSet) {
|
||||
// case 2, use CPU manager selected CPUstry
|
||||
return cpuManagerAffinityCPUSet
|
||||
} else {
|
||||
// Case 3, merge CPU manager and memory manager selected CPUs
|
||||
return cpuManagerAffinityCPUSet.Union(numaNodeAffinityCPUSet)
|
||||
}
|
||||
} else if !allocatedCPUs.IsEmpty() {
|
||||
// Only CPU manager is enabled, use CPU manager selected CPUs
|
||||
return sets.New[int](allocatedCPUs.List()...)
|
||||
} else if len(allNumaNodeCPUs) > 0 {
|
||||
// Only memory manager is enabled, use CPUs associated with selected NUMA nodes
|
||||
return computeCPUSet(allNumaNodeCPUs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// computeCPUSet converts a list of GroupAffinity to a set of CPU IDs
|
||||
func computeCPUSet(affinities []winstats.GroupAffinity) sets.Set[int] {
|
||||
cpuSet := sets.New[int]()
|
||||
for _, affinity := range affinities {
|
||||
for i := 0; i < 64; i++ {
|
||||
if (affinity.Mask>>i)&1 == 1 {
|
||||
cpuID := int(affinity.Group)*64 + i
|
||||
cpuSet.Insert(cpuID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuSet
|
||||
}
|
||||
|
||||
// groupMasks converts a set of CPU IDs into group and mask representations
|
||||
func groupMasks(cpuSet sets.Set[int]) map[int]uint64 {
|
||||
groupMasks := make(map[int]uint64)
|
||||
for cpu := range cpuSet {
|
||||
group := cpu / 64
|
||||
mask := uint64(1) << (cpu % 64)
|
||||
groupMasks[group] |= mask
|
||||
}
|
||||
return groupMasks
|
||||
}
|
94
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/fake_memory_manager.go
generated
vendored
Normal file
94
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/fake_memory_manager.go
generated
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package memorymanager
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
)
|
||||
|
||||
type fakeManager struct {
|
||||
state state.State
|
||||
}
|
||||
|
||||
func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
klog.InfoS("Start()")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) Policy() Policy {
|
||||
klog.InfoS("Policy()")
|
||||
return NewPolicyNone()
|
||||
}
|
||||
|
||||
func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||
klog.InfoS("Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
|
||||
klog.InfoS("Add container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID)
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Set[int] {
|
||||
klog.InfoS("Get MemoryNUMANodes", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) RemoveContainer(containerID string) error {
|
||||
klog.InfoS("RemoveContainer", "containerID", containerID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
klog.InfoS("Get Topology Hints", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return map[string][]topologymanager.TopologyHint{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
klog.InfoS("Get Pod Topology Hints", "pod", klog.KObj(pod))
|
||||
return map[string][]topologymanager.TopologyHint{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
// GetAllocatableMemory returns the amount of allocatable memory for each NUMA node
|
||||
func (m *fakeManager) GetAllocatableMemory() []state.Block {
|
||||
klog.InfoS("Get Allocatable Memory")
|
||||
return []state.Block{}
|
||||
}
|
||||
|
||||
// GetMemory returns the memory allocated by a container from NUMA nodes
|
||||
func (m *fakeManager) GetMemory(podUID, containerName string) []state.Block {
|
||||
klog.InfoS("Get Memory", "podUID", podUID, "containerName", containerName)
|
||||
return []state.Block{}
|
||||
}
|
||||
|
||||
// NewFakeManager creates empty/fake memory manager
|
||||
func NewFakeManager() Manager {
|
||||
return &fakeManager{
|
||||
state: state.NewMemoryState(),
|
||||
}
|
||||
}
|
467
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/memory_manager.go
generated
vendored
Normal file
467
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/memory_manager.go
generated
vendored
Normal file
@ -0,0 +1,467 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package memorymanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
)
|
||||
|
||||
// memoryManagerStateFileName is the file name where memory manager stores its state
|
||||
const memoryManagerStateFileName = "memory_manager_state"
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of active pods
|
||||
type ActivePodsFunc func() []*v1.Pod
|
||||
|
||||
type runtimeService interface {
|
||||
UpdateContainerResources(ctx context.Context, id string, resources *runtimeapi.ContainerResources) error
|
||||
}
|
||||
|
||||
type sourcesReadyStub struct{}
|
||||
|
||||
func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// Manager interface provides methods for Kubelet to manage pod memory.
|
||||
type Manager interface {
|
||||
// Start is called during Kubelet initialization.
|
||||
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error
|
||||
|
||||
// AddContainer adds the mapping between container ID to pod UID and the container name
|
||||
// The mapping used to remove the memory allocation during the container removal
|
||||
AddContainer(p *v1.Pod, c *v1.Container, containerID string)
|
||||
|
||||
// Allocate is called to pre-allocate memory resources during Pod admission.
|
||||
// This must be called at some point prior to the AddContainer() call for a container, e.g. at pod admission time.
|
||||
Allocate(pod *v1.Pod, container *v1.Container) error
|
||||
|
||||
// RemoveContainer is called after Kubelet decides to kill or delete a
|
||||
// container. After this call, any memory allocated to the container is freed.
|
||||
RemoveContainer(containerID string) error
|
||||
|
||||
// State returns a read-only interface to the internal memory manager state.
|
||||
State() state.Reader
|
||||
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint
|
||||
|
||||
// GetPodTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint
|
||||
|
||||
// GetMemoryNUMANodes provides NUMA nodes that are used to allocate the container memory
|
||||
GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Set[int]
|
||||
|
||||
// GetAllocatableMemory returns the amount of allocatable memory for each NUMA node
|
||||
GetAllocatableMemory() []state.Block
|
||||
|
||||
// GetMemory returns the memory allocated by a container from NUMA nodes
|
||||
GetMemory(podUID, containerName string) []state.Block
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
sync.Mutex
|
||||
policy Policy
|
||||
|
||||
// state allows to restore information regarding memory allocation for guaranteed pods
|
||||
// in the case of the kubelet restart
|
||||
state state.State
|
||||
|
||||
// containerRuntime is the container runtime service interface needed
|
||||
// to make UpdateContainerResources() calls against the containers.
|
||||
containerRuntime runtimeService
|
||||
|
||||
// activePods is a method for listing active pods on the node
|
||||
// so all the containers can be updated during call to the removeStaleState.
|
||||
activePods ActivePodsFunc
|
||||
|
||||
// podStatusProvider provides a method for obtaining pod statuses
|
||||
// and the containerID of their containers
|
||||
podStatusProvider status.PodStatusProvider
|
||||
|
||||
// containerMap provides a mapping from (pod, container) -> containerID
|
||||
// for all containers a pod
|
||||
containerMap containermap.ContainerMap
|
||||
|
||||
// sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness.
|
||||
// We use it to determine when we can purge inactive pods from checkpointed state.
|
||||
sourcesReady config.SourcesReady
|
||||
|
||||
// stateFileDirectory holds the directory where the state file for checkpoints is held.
|
||||
stateFileDirectory string
|
||||
|
||||
// allocatableMemory holds the allocatable memory for each NUMA node
|
||||
allocatableMemory []state.Block
|
||||
}
|
||||
|
||||
var _ Manager = &manager{}
|
||||
|
||||
// NewManager returns new instance of the memory manager
|
||||
func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory []kubeletconfig.MemoryReservation, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||
var policy Policy
|
||||
|
||||
switch policyType(policyName) {
|
||||
|
||||
case policyTypeNone:
|
||||
policy = NewPolicyNone()
|
||||
|
||||
case policyTypeStatic:
|
||||
if runtime.GOOS == "windows" {
|
||||
return nil, fmt.Errorf("policy %q is not available on Windows", policyTypeStatic)
|
||||
}
|
||||
|
||||
systemReserved, err := getSystemReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
policy, err = NewPolicyStatic(machineInfo, systemReserved, affinity)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
case policyTypeBestEffort:
|
||||
if runtime.GOOS == "windows" {
|
||||
systemReserved, err := getSystemReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
policy, err = NewPolicyBestEffort(machineInfo, systemReserved, affinity)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("policy %q is not available for platform %q", policyTypeBestEffort, runtime.GOOS)
|
||||
}
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown policy: %q", policyName)
|
||||
}
|
||||
|
||||
manager := &manager{
|
||||
policy: policy,
|
||||
stateFileDirectory: stateFileDirectory,
|
||||
}
|
||||
manager.sourcesReady = &sourcesReadyStub{}
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
// Start starts the memory manager under the kubelet and calls policy start
|
||||
func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
klog.InfoS("Starting memorymanager", "policy", m.policy.Name())
|
||||
m.sourcesReady = sourcesReady
|
||||
m.activePods = activePods
|
||||
m.podStatusProvider = podStatusProvider
|
||||
m.containerRuntime = containerRuntime
|
||||
m.containerMap = initialContainers
|
||||
|
||||
stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, memoryManagerStateFileName, m.policy.Name())
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Could not initialize checkpoint manager, please drain node and remove policy state file")
|
||||
return err
|
||||
}
|
||||
m.state = stateImpl
|
||||
|
||||
err = m.policy.Start(m.state)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Policy start error")
|
||||
return err
|
||||
}
|
||||
|
||||
m.allocatableMemory = m.policy.GetAllocatableMemory(m.state)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddContainer saves the value of requested memory for the guaranteed pod under the state and set memory affinity according to the topolgy manager
|
||||
func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
m.containerMap.Add(string(pod.UID), container.Name, containerID)
|
||||
|
||||
// Since we know that each init container always runs to completion before
|
||||
// the next container starts, we can safely remove references to any previously
|
||||
// started init containers. This will free up the memory from these init containers
|
||||
// for use in other pods. If the current container happens to be an init container,
|
||||
// we skip deletion of it until the next container is added, and this is called again.
|
||||
for _, initContainer := range pod.Spec.InitContainers {
|
||||
if initContainer.Name == container.Name {
|
||||
break
|
||||
}
|
||||
|
||||
// Since a restartable init container remains running for the full
|
||||
// duration of the pod's lifecycle, we should not remove it from the
|
||||
// memory manager state.
|
||||
if podutil.IsRestartableInitContainer(&initContainer) {
|
||||
continue
|
||||
}
|
||||
|
||||
m.policyRemoveContainerByRef(string(pod.UID), initContainer.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// GetMemoryNUMANodes provides NUMA nodes that used to allocate the container memory
|
||||
func (m *manager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Set[int] {
|
||||
// Get NUMA node affinity of blocks assigned to the container during Allocate()
|
||||
numaNodes := sets.New[int]()
|
||||
for _, block := range m.state.GetMemoryBlocks(string(pod.UID), container.Name) {
|
||||
for _, nodeID := range block.NUMAAffinity {
|
||||
// avoid nodes duplication when hugepages and memory blocks pinned to the same NUMA node
|
||||
numaNodes.Insert(nodeID)
|
||||
}
|
||||
}
|
||||
|
||||
if numaNodes.Len() == 0 {
|
||||
klog.V(5).InfoS("No allocation is available", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.InfoS("Memory affinity", "pod", klog.KObj(pod), "containerName", container.Name, "numaNodes", numaNodes)
|
||||
return numaNodes
|
||||
}
|
||||
|
||||
// Allocate is called to pre-allocate memory resources during Pod admission.
|
||||
func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||
// Garbage collect any stranded resources before allocation
|
||||
m.removeStaleState()
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
// Call down into the policy to assign this container memory if required.
|
||||
if err := m.policy.Allocate(m.state, pod, container); err != nil {
|
||||
klog.ErrorS(err, "Allocate error")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveContainer removes the container from the state
|
||||
func (m *manager) RemoveContainer(containerID string) error {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
// if error appears it means container entry already does not exist under the container map
|
||||
podUID, containerName, err := m.containerMap.GetContainerRef(containerID)
|
||||
if err != nil {
|
||||
klog.InfoS("Failed to get container from container map", "containerID", containerID, "err", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
m.policyRemoveContainerByRef(podUID, containerName)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// State returns the state of the manager
|
||||
func (m *manager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
// GetPodTopologyHints returns the topology hints for the topology manager
|
||||
func (m *manager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
// Garbage collect any stranded resources before providing TopologyHints
|
||||
m.removeStaleState()
|
||||
// Delegate to active policy
|
||||
return m.policy.GetPodTopologyHints(m.state, pod)
|
||||
}
|
||||
|
||||
// GetTopologyHints returns the topology hints for the topology manager
|
||||
func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
// Garbage collect any stranded resources before providing TopologyHints
|
||||
m.removeStaleState()
|
||||
// Delegate to active policy
|
||||
return m.policy.GetTopologyHints(m.state, pod, container)
|
||||
}
|
||||
|
||||
// TODO: move the method to the upper level, to re-use it under the CPU and memory managers
|
||||
func (m *manager) removeStaleState() {
|
||||
// Only once all sources are ready do we attempt to remove any stale state.
|
||||
// This ensures that the call to `m.activePods()` below will succeed with
|
||||
// the actual active pods list.
|
||||
if !m.sourcesReady.AllReady() {
|
||||
return
|
||||
}
|
||||
|
||||
// We grab the lock to ensure that no new containers will grab memory block while
|
||||
// executing the code below. Without this lock, its possible that we end up
|
||||
// removing state that is newly added by an asynchronous call to
|
||||
// AddContainer() during the execution of this code.
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
// Get the list of active pods.
|
||||
activePods := m.activePods()
|
||||
|
||||
// Build a list of (podUID, containerName) pairs for all containers in all active Pods.
|
||||
activeContainers := make(map[string]map[string]struct{})
|
||||
for _, pod := range activePods {
|
||||
activeContainers[string(pod.UID)] = make(map[string]struct{})
|
||||
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
|
||||
activeContainers[string(pod.UID)][container.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// Loop through the MemoryManager state. Remove any state for containers not
|
||||
// in the `activeContainers` list built above.
|
||||
assignments := m.state.GetMemoryAssignments()
|
||||
for podUID := range assignments {
|
||||
for containerName := range assignments[podUID] {
|
||||
if _, ok := activeContainers[podUID][containerName]; !ok {
|
||||
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
|
||||
m.policyRemoveContainerByRef(podUID, containerName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m.containerMap.Visit(func(podUID, containerName, containerID string) {
|
||||
if _, ok := activeContainers[podUID][containerName]; !ok {
|
||||
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
|
||||
m.policyRemoveContainerByRef(podUID, containerName)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) {
|
||||
m.policy.RemoveContainer(m.state, podUID, containerName)
|
||||
m.containerMap.RemoveByContainerRef(podUID, containerName)
|
||||
}
|
||||
|
||||
func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) (map[v1.ResourceName]resource.Quantity, error) {
|
||||
totalMemoryType := map[v1.ResourceName]resource.Quantity{}
|
||||
|
||||
numaNodes := map[int]bool{}
|
||||
for _, numaNode := range machineInfo.Topology {
|
||||
numaNodes[numaNode.Id] = true
|
||||
}
|
||||
|
||||
for _, reservation := range reservedMemory {
|
||||
if !numaNodes[int(reservation.NumaNode)] {
|
||||
return nil, fmt.Errorf("the reserved memory configuration references a NUMA node %d that does not exist on this machine", reservation.NumaNode)
|
||||
}
|
||||
|
||||
for resourceName, q := range reservation.Limits {
|
||||
if value, ok := totalMemoryType[resourceName]; ok {
|
||||
q.Add(value)
|
||||
}
|
||||
totalMemoryType[resourceName] = q
|
||||
}
|
||||
}
|
||||
|
||||
return totalMemoryType, nil
|
||||
}
|
||||
|
||||
func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory []kubeletconfig.MemoryReservation) error {
|
||||
totalMemoryType, err := getTotalMemoryTypeReserved(machineInfo, reservedMemory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
commonMemoryTypeSet := make(map[v1.ResourceName]bool)
|
||||
for resourceType := range totalMemoryType {
|
||||
commonMemoryTypeSet[resourceType] = true
|
||||
}
|
||||
|
||||
for resourceType := range nodeAllocatableReservation {
|
||||
if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) {
|
||||
continue
|
||||
}
|
||||
commonMemoryTypeSet[resourceType] = true
|
||||
}
|
||||
|
||||
for resourceType := range commonMemoryTypeSet {
|
||||
nodeAllocatableMemory := resource.NewQuantity(0, resource.DecimalSI)
|
||||
if memValue, set := nodeAllocatableReservation[resourceType]; set {
|
||||
nodeAllocatableMemory.Add(memValue)
|
||||
}
|
||||
|
||||
reservedMemory := resource.NewQuantity(0, resource.DecimalSI)
|
||||
if memValue, set := totalMemoryType[resourceType]; set {
|
||||
reservedMemory.Add(memValue)
|
||||
}
|
||||
|
||||
if !(*nodeAllocatableMemory).Equal(*reservedMemory) {
|
||||
return fmt.Errorf("the total amount %q of type %q is not equal to the value %q determined by Node Allocatable feature", reservedMemory.String(), resourceType, nodeAllocatableMemory.String())
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) (systemReservedMemory, error) {
|
||||
reservedMemoryConverted := make(map[int]map[v1.ResourceName]uint64)
|
||||
for _, node := range machineInfo.Topology {
|
||||
reservedMemoryConverted[node.Id] = make(map[v1.ResourceName]uint64)
|
||||
}
|
||||
|
||||
for _, reservation := range reservedMemory {
|
||||
for resourceName, q := range reservation.Limits {
|
||||
val, success := q.AsInt64()
|
||||
if !success {
|
||||
return nil, fmt.Errorf("could not covert a variable of type Quantity to int64")
|
||||
}
|
||||
reservedMemoryConverted[int(reservation.NumaNode)][resourceName] = uint64(val)
|
||||
}
|
||||
}
|
||||
|
||||
return reservedMemoryConverted, nil
|
||||
}
|
||||
|
||||
func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory []kubeletconfig.MemoryReservation) (systemReservedMemory, error) {
|
||||
if err := validateReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reservedMemoryConverted, err := convertReserved(machineInfo, reservedMemory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return reservedMemoryConverted, nil
|
||||
}
|
||||
|
||||
// GetAllocatableMemory returns the amount of allocatable memory for each NUMA node
|
||||
func (m *manager) GetAllocatableMemory() []state.Block {
|
||||
return m.allocatableMemory
|
||||
}
|
||||
|
||||
// GetMemory returns the memory allocated by a container from NUMA nodes
|
||||
func (m *manager) GetMemory(podUID, containerName string) []state.Block {
|
||||
return m.state.GetMemoryBlocks(podUID, containerName)
|
||||
}
|
46
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy.go
generated
vendored
Normal file
46
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy.go
generated
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package memorymanager
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
)
|
||||
|
||||
// Type defines the policy type
|
||||
type policyType string
|
||||
|
||||
// Policy implements logic for pod container to a memory assignment.
|
||||
type Policy interface {
|
||||
Name() string
|
||||
Start(s state.State) error
|
||||
// Allocate call is idempotent
|
||||
Allocate(s state.State, pod *v1.Pod, container *v1.Container) error
|
||||
// RemoveContainer call is idempotent
|
||||
RemoveContainer(s state.State, podUID string, containerName string)
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint
|
||||
// GetPodTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint
|
||||
// GetAllocatableMemory returns the amount of allocatable memory for each NUMA node
|
||||
GetAllocatableMemory(s state.State) []state.Block
|
||||
}
|
80
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy_best_effort.go
generated
vendored
Normal file
80
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy_best_effort.go
generated
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package memorymanager
|
||||
|
||||
import (
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
)
|
||||
|
||||
// On Windows we want to use the same logic as the StaticPolicy to compute the memory topology hints
|
||||
// but unlike linux based systems, on Windows systems numa nodes cannot be directly assigned or guaranteed via Windows APIs
|
||||
// (windows scheduler will use the numa node that is closest to the cpu assigned therefor respecting the numa node assignment as a best effort). Because of this we don't want to have users specify "StaticPolicy" for the memory manager
|
||||
// policy via kubelet configuration. Instead we want to use the "BestEffort" policy which will use the same logic as the StaticPolicy
|
||||
// and doing so will reduce code duplication.
|
||||
const policyTypeBestEffort policyType = "BestEffort"
|
||||
|
||||
// bestEffortPolicy is implementation of the policy interface for the BestEffort policy
|
||||
type bestEffortPolicy struct {
|
||||
static *staticPolicy
|
||||
}
|
||||
|
||||
var _ Policy = &bestEffortPolicy{}
|
||||
|
||||
func NewPolicyBestEffort(machineInfo *cadvisorapi.MachineInfo, reserved systemReservedMemory, affinity topologymanager.Store) (Policy, error) {
|
||||
p, err := NewPolicyStatic(machineInfo, reserved, affinity)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &bestEffortPolicy{
|
||||
static: p.(*staticPolicy),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *bestEffortPolicy) Name() string {
|
||||
return string(policyTypeBestEffort)
|
||||
}
|
||||
|
||||
func (p *bestEffortPolicy) Start(s state.State) error {
|
||||
return p.static.Start(s)
|
||||
}
|
||||
|
||||
func (p *bestEffortPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
|
||||
return p.static.Allocate(s, pod, container)
|
||||
}
|
||||
|
||||
func (p *bestEffortPolicy) RemoveContainer(s state.State, podUID string, containerName string) {
|
||||
p.static.RemoveContainer(s, podUID, containerName)
|
||||
}
|
||||
|
||||
func (p *bestEffortPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
return p.static.GetPodTopologyHints(s, pod)
|
||||
}
|
||||
|
||||
func (p *bestEffortPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
return p.static.GetTopologyHints(s, pod, container)
|
||||
}
|
||||
|
||||
func (p *bestEffortPolicy) GetAllocatableMemory(s state.State) []state.Block {
|
||||
return p.static.GetAllocatableMemory(s)
|
||||
}
|
72
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy_none.go
generated
vendored
Normal file
72
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy_none.go
generated
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package memorymanager
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
)
|
||||
|
||||
const policyTypeNone policyType = "None"
|
||||
|
||||
// none is implementation of the policy interface for the none policy, using none
|
||||
// policy is the same as disable memory management
|
||||
type none struct{}
|
||||
|
||||
var _ Policy = &none{}
|
||||
|
||||
// NewPolicyNone returns new none policy instance
|
||||
func NewPolicyNone() Policy {
|
||||
return &none{}
|
||||
}
|
||||
|
||||
func (p *none) Name() string {
|
||||
return string(policyTypeNone)
|
||||
}
|
||||
|
||||
func (p *none) Start(s state.State) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Allocate call is idempotent
|
||||
func (p *none) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveContainer call is idempotent
|
||||
func (p *none) RemoveContainer(s state.State, podUID string, containerName string) {
|
||||
}
|
||||
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
func (p *none) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetPodTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
func (p *none) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllocatableMemory returns the amount of allocatable memory for each NUMA node
|
||||
func (p *none) GetAllocatableMemory(s state.State) []state.Block {
|
||||
return []state.Block{}
|
||||
}
|
1057
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy_static.go
generated
vendored
Normal file
1057
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/policy_static.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
65
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/checkpoint.go
generated
vendored
Normal file
65
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
)
|
||||
|
||||
var _ checkpointmanager.Checkpoint = &MemoryManagerCheckpoint{}
|
||||
|
||||
// MemoryManagerCheckpoint struct is used to store memory/pod assignments in a checkpoint
|
||||
type MemoryManagerCheckpoint struct {
|
||||
PolicyName string `json:"policyName"`
|
||||
MachineState NUMANodeMap `json:"machineState"`
|
||||
Entries ContainerMemoryAssignments `json:"entries,omitempty"`
|
||||
Checksum checksum.Checksum `json:"checksum"`
|
||||
}
|
||||
|
||||
// NewMemoryManagerCheckpoint returns an instance of Checkpoint
|
||||
func NewMemoryManagerCheckpoint() *MemoryManagerCheckpoint {
|
||||
//nolint:staticcheck // unexported-type-in-api user-facing error message
|
||||
return &MemoryManagerCheckpoint{
|
||||
Entries: ContainerMemoryAssignments{},
|
||||
MachineState: NUMANodeMap{},
|
||||
}
|
||||
}
|
||||
|
||||
// MarshalCheckpoint returns marshalled checkpoint
|
||||
func (mp *MemoryManagerCheckpoint) MarshalCheckpoint() ([]byte, error) {
|
||||
// make sure checksum wasn't set before so it doesn't affect output checksum
|
||||
mp.Checksum = 0
|
||||
mp.Checksum = checksum.New(mp)
|
||||
return json.Marshal(*mp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint
|
||||
func (mp *MemoryManagerCheckpoint) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, mp)
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that current checksum of checkpoint is valid
|
||||
func (mp *MemoryManagerCheckpoint) VerifyChecksum() error {
|
||||
ck := mp.Checksum
|
||||
mp.Checksum = 0
|
||||
err := ck.Verify(mp)
|
||||
mp.Checksum = ck
|
||||
return err
|
||||
}
|
130
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/state.go
generated
vendored
Normal file
130
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/state.go
generated
vendored
Normal file
@ -0,0 +1,130 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// MemoryTable contains memory information
|
||||
type MemoryTable struct {
|
||||
TotalMemSize uint64 `json:"total"`
|
||||
SystemReserved uint64 `json:"systemReserved"`
|
||||
Allocatable uint64 `json:"allocatable"`
|
||||
Reserved uint64 `json:"reserved"`
|
||||
Free uint64 `json:"free"`
|
||||
}
|
||||
|
||||
// NUMANodeState contains NUMA node related information
|
||||
type NUMANodeState struct {
|
||||
// NumberOfAssignments contains a number memory assignments from this node
|
||||
// When the container requires memory and hugepages it will increase number of assignments by two
|
||||
NumberOfAssignments int `json:"numberOfAssignments"`
|
||||
// MemoryTable contains NUMA node memory related information
|
||||
MemoryMap map[v1.ResourceName]*MemoryTable `json:"memoryMap"`
|
||||
// Cells contains the current NUMA node and all other nodes that are in a group with current NUMA node
|
||||
// This parameter indicates if the current node is used for the multiple NUMA node memory allocation
|
||||
// For example if some container has pinning 0,1,2, NUMA nodes 0,1,2 under the state will have
|
||||
// this parameter equals to [0, 1, 2]
|
||||
Cells []int `json:"cells"`
|
||||
}
|
||||
|
||||
// NUMANodeMap contains memory information for each NUMA node.
|
||||
type NUMANodeMap map[int]*NUMANodeState
|
||||
|
||||
// Clone returns a copy of NUMANodeMap
|
||||
func (nm NUMANodeMap) Clone() NUMANodeMap {
|
||||
clone := make(NUMANodeMap)
|
||||
for node, s := range nm {
|
||||
if s == nil {
|
||||
clone[node] = nil
|
||||
continue
|
||||
}
|
||||
|
||||
clone[node] = &NUMANodeState{}
|
||||
clone[node].NumberOfAssignments = s.NumberOfAssignments
|
||||
clone[node].Cells = append([]int{}, s.Cells...)
|
||||
|
||||
if s.MemoryMap == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
clone[node].MemoryMap = map[v1.ResourceName]*MemoryTable{}
|
||||
for memoryType, memoryTable := range s.MemoryMap {
|
||||
clone[node].MemoryMap[memoryType] = &MemoryTable{
|
||||
Allocatable: memoryTable.Allocatable,
|
||||
Free: memoryTable.Free,
|
||||
Reserved: memoryTable.Reserved,
|
||||
SystemReserved: memoryTable.SystemReserved,
|
||||
TotalMemSize: memoryTable.TotalMemSize,
|
||||
}
|
||||
}
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
// Block is a data structure used to represent a certain amount of memory
|
||||
type Block struct {
|
||||
// NUMAAffinity contains the string that represents NUMA affinity bitmask
|
||||
NUMAAffinity []int `json:"numaAffinity"`
|
||||
Type v1.ResourceName `json:"type"`
|
||||
Size uint64 `json:"size"`
|
||||
}
|
||||
|
||||
// ContainerMemoryAssignments stores memory assignments of containers
|
||||
type ContainerMemoryAssignments map[string]map[string][]Block
|
||||
|
||||
// Clone returns a copy of ContainerMemoryAssignments
|
||||
func (as ContainerMemoryAssignments) Clone() ContainerMemoryAssignments {
|
||||
clone := make(ContainerMemoryAssignments)
|
||||
for pod := range as {
|
||||
clone[pod] = make(map[string][]Block)
|
||||
for container, blocks := range as[pod] {
|
||||
clone[pod][container] = append([]Block{}, blocks...)
|
||||
}
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
// Reader interface used to read current memory/pod assignment state
|
||||
type Reader interface {
|
||||
// GetMachineState returns Memory Map stored in the State
|
||||
GetMachineState() NUMANodeMap
|
||||
// GetMemoryBlocks returns memory assignments of a container
|
||||
GetMemoryBlocks(podUID string, containerName string) []Block
|
||||
// GetMemoryAssignments returns ContainerMemoryAssignments
|
||||
GetMemoryAssignments() ContainerMemoryAssignments
|
||||
}
|
||||
|
||||
type writer interface {
|
||||
// SetMachineState stores NUMANodeMap in State
|
||||
SetMachineState(memoryMap NUMANodeMap)
|
||||
// SetMemoryBlocks stores memory assignments of a container
|
||||
SetMemoryBlocks(podUID string, containerName string, blocks []Block)
|
||||
// SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter
|
||||
SetMemoryAssignments(assignments ContainerMemoryAssignments)
|
||||
// Delete deletes corresponding Blocks from ContainerMemoryAssignments
|
||||
Delete(podUID string, containerName string)
|
||||
// ClearState clears machineState and ContainerMemoryAssignments
|
||||
ClearState()
|
||||
}
|
||||
|
||||
// State interface provides methods for tracking and setting memory/pod assignment
|
||||
type State interface {
|
||||
Reader
|
||||
writer
|
||||
}
|
184
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go
generated
vendored
Normal file
184
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,184 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
var _ State = &stateCheckpoint{}
|
||||
|
||||
type stateCheckpoint struct {
|
||||
sync.RWMutex
|
||||
cache State
|
||||
policyName string
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
checkpointName string
|
||||
}
|
||||
|
||||
// NewCheckpointState creates new State for keeping track of memory/pod assignment with checkpoint backend
|
||||
func NewCheckpointState(stateDir, checkpointName, policyName string) (State, error) {
|
||||
checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
|
||||
}
|
||||
stateCheckpoint := &stateCheckpoint{
|
||||
cache: NewMemoryState(),
|
||||
policyName: policyName,
|
||||
checkpointManager: checkpointManager,
|
||||
checkpointName: checkpointName,
|
||||
}
|
||||
|
||||
if err := stateCheckpoint.restoreState(); err != nil {
|
||||
//nolint:staticcheck // ST1005 user-facing error message
|
||||
return nil, fmt.Errorf("could not restore state from checkpoint: %v, please drain this node and delete the memory manager checkpoint file %q before restarting Kubelet",
|
||||
err, filepath.Join(stateDir, checkpointName))
|
||||
}
|
||||
|
||||
return stateCheckpoint, nil
|
||||
}
|
||||
|
||||
// restores state from a checkpoint and creates it if it doesn't exist
|
||||
func (sc *stateCheckpoint) restoreState() error {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
var err error
|
||||
|
||||
checkpoint := NewMemoryManagerCheckpoint()
|
||||
if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint); err != nil {
|
||||
if err == errors.ErrCheckpointNotFound {
|
||||
return sc.storeState()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if sc.policyName != checkpoint.PolicyName {
|
||||
return fmt.Errorf("[memorymanager] configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpoint.PolicyName)
|
||||
}
|
||||
|
||||
sc.cache.SetMachineState(checkpoint.MachineState)
|
||||
sc.cache.SetMemoryAssignments(checkpoint.Entries)
|
||||
|
||||
klog.V(2).InfoS("State checkpoint: restored state from checkpoint")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// saves state to a checkpoint, caller is responsible for locking
|
||||
func (sc *stateCheckpoint) storeState() error {
|
||||
checkpoint := NewMemoryManagerCheckpoint()
|
||||
checkpoint.PolicyName = sc.policyName
|
||||
checkpoint.MachineState = sc.cache.GetMachineState()
|
||||
checkpoint.Entries = sc.cache.GetMemoryAssignments()
|
||||
|
||||
err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Could not save checkpoint")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetMemoryState returns Memory Map stored in the State
|
||||
func (sc *stateCheckpoint) GetMachineState() NUMANodeMap {
|
||||
sc.RLock()
|
||||
defer sc.RUnlock()
|
||||
|
||||
return sc.cache.GetMachineState()
|
||||
}
|
||||
|
||||
// GetMemoryBlocks returns memory assignments of a container
|
||||
func (sc *stateCheckpoint) GetMemoryBlocks(podUID string, containerName string) []Block {
|
||||
sc.RLock()
|
||||
defer sc.RUnlock()
|
||||
|
||||
return sc.cache.GetMemoryBlocks(podUID, containerName)
|
||||
}
|
||||
|
||||
// GetMemoryAssignments returns ContainerMemoryAssignments
|
||||
func (sc *stateCheckpoint) GetMemoryAssignments() ContainerMemoryAssignments {
|
||||
sc.RLock()
|
||||
defer sc.RUnlock()
|
||||
|
||||
return sc.cache.GetMemoryAssignments()
|
||||
}
|
||||
|
||||
// SetMachineState stores NUMANodeMap in State
|
||||
func (sc *stateCheckpoint) SetMachineState(memoryMap NUMANodeMap) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
sc.cache.SetMachineState(memoryMap)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// SetMemoryBlocks stores memory assignments of container
|
||||
func (sc *stateCheckpoint) SetMemoryBlocks(podUID string, containerName string, blocks []Block) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
sc.cache.SetMemoryBlocks(podUID, containerName, blocks)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter
|
||||
func (sc *stateCheckpoint) SetMemoryAssignments(assignments ContainerMemoryAssignments) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
sc.cache.SetMemoryAssignments(assignments)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete deletes corresponding Blocks from ContainerMemoryAssignments
|
||||
func (sc *stateCheckpoint) Delete(podUID string, containerName string) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
sc.cache.Delete(podUID, containerName)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ClearState clears machineState and ContainerMemoryAssignments
|
||||
func (sc *stateCheckpoint) ClearState() {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
sc.cache.ClearState()
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/state_mem.go
generated
vendored
Normal file
123
vendor/k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state/state_mem.go
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
type stateMemory struct {
|
||||
sync.RWMutex
|
||||
assignments ContainerMemoryAssignments
|
||||
machineState NUMANodeMap
|
||||
}
|
||||
|
||||
var _ State = &stateMemory{}
|
||||
|
||||
// NewMemoryState creates new State for keeping track of cpu/pod assignment
|
||||
func NewMemoryState() State {
|
||||
klog.InfoS("Initializing new in-memory state store")
|
||||
return &stateMemory{
|
||||
assignments: ContainerMemoryAssignments{},
|
||||
machineState: NUMANodeMap{},
|
||||
}
|
||||
}
|
||||
|
||||
// GetMemoryState returns Memory Map stored in the State
|
||||
func (s *stateMemory) GetMachineState() NUMANodeMap {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.machineState.Clone()
|
||||
}
|
||||
|
||||
// GetMemoryBlocks returns memory assignments of a container
|
||||
func (s *stateMemory) GetMemoryBlocks(podUID string, containerName string) []Block {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
if res, ok := s.assignments[podUID][containerName]; ok {
|
||||
return append([]Block{}, res...)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetMemoryAssignments returns ContainerMemoryAssignments
|
||||
func (s *stateMemory) GetMemoryAssignments() ContainerMemoryAssignments {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.assignments.Clone()
|
||||
}
|
||||
|
||||
// SetMachineState stores NUMANodeMap in State
|
||||
func (s *stateMemory) SetMachineState(nodeMap NUMANodeMap) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.machineState = nodeMap.Clone()
|
||||
klog.InfoS("Updated machine memory state")
|
||||
}
|
||||
|
||||
// SetMemoryBlocks stores memory assignments of container
|
||||
func (s *stateMemory) SetMemoryBlocks(podUID string, containerName string, blocks []Block) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
if _, ok := s.assignments[podUID]; !ok {
|
||||
s.assignments[podUID] = map[string][]Block{}
|
||||
}
|
||||
|
||||
s.assignments[podUID][containerName] = append([]Block{}, blocks...)
|
||||
klog.InfoS("Updated memory state", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
|
||||
// SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter
|
||||
func (s *stateMemory) SetMemoryAssignments(assignments ContainerMemoryAssignments) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.assignments = assignments.Clone()
|
||||
}
|
||||
|
||||
// Delete deletes corresponding Blocks from ContainerMemoryAssignments
|
||||
func (s *stateMemory) Delete(podUID string, containerName string) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
if _, ok := s.assignments[podUID]; !ok {
|
||||
return
|
||||
}
|
||||
|
||||
delete(s.assignments[podUID], containerName)
|
||||
if len(s.assignments[podUID]) == 0 {
|
||||
delete(s.assignments, podUID)
|
||||
}
|
||||
klog.V(2).InfoS("Deleted memory assignment", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
|
||||
// ClearState clears machineState and ContainerMemoryAssignments
|
||||
func (s *stateMemory) ClearState() {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.machineState = NUMANodeMap{}
|
||||
s.assignments = make(ContainerMemoryAssignments)
|
||||
klog.V(2).InfoS("Cleared state")
|
||||
}
|
328
vendor/k8s.io/kubernetes/pkg/kubelet/cm/node_container_manager_linux.go
generated
vendored
Normal file
328
vendor/k8s.io/kubernetes/pkg/kubelet/cm/node_container_manager_linux.go
generated
vendored
Normal file
@ -0,0 +1,328 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/klog/v2"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/events"
|
||||
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultNodeAllocatableCgroupName = "kubepods"
|
||||
)
|
||||
|
||||
// createNodeAllocatableCgroups creates Node Allocatable Cgroup when CgroupsPerQOS flag is specified as true
|
||||
func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
|
||||
nodeAllocatable := cm.internalCapacity
|
||||
// Use Node Allocatable limits instead of capacity if the user requested enforcing node allocatable.
|
||||
nc := cm.NodeConfig.NodeAllocatableConfig
|
||||
if cm.CgroupsPerQOS && nc.EnforceNodeAllocatable.Has(kubetypes.NodeAllocatableEnforcementKey) {
|
||||
nodeAllocatable = cm.getNodeAllocatableInternalAbsolute()
|
||||
}
|
||||
|
||||
cgroupConfig := &CgroupConfig{
|
||||
Name: cm.cgroupRoot,
|
||||
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
|
||||
ResourceParameters: cm.getCgroupConfig(nodeAllocatable, false),
|
||||
}
|
||||
if cm.cgroupManager.Exists(cgroupConfig.Name) {
|
||||
return nil
|
||||
}
|
||||
if err := cm.cgroupManager.Create(cgroupConfig); err != nil {
|
||||
klog.ErrorS(err, "Failed to create cgroup", "cgroupName", cm.cgroupRoot)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// enforceNodeAllocatableCgroups enforce Node Allocatable Cgroup settings.
|
||||
func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
|
||||
nc := cm.NodeConfig.NodeAllocatableConfig
|
||||
|
||||
// We need to update limits on node allocatable cgroup no matter what because
|
||||
// default cpu shares on cgroups are low and can cause cpu starvation.
|
||||
nodeAllocatable := cm.internalCapacity
|
||||
// Use Node Allocatable limits instead of capacity if the user requested enforcing node allocatable.
|
||||
if cm.CgroupsPerQOS && nc.EnforceNodeAllocatable.Has(kubetypes.NodeAllocatableEnforcementKey) {
|
||||
nodeAllocatable = cm.getNodeAllocatableInternalAbsolute()
|
||||
}
|
||||
|
||||
klog.V(4).InfoS("Attempting to enforce Node Allocatable", "config", nc)
|
||||
|
||||
cgroupConfig := &CgroupConfig{
|
||||
Name: cm.cgroupRoot,
|
||||
ResourceParameters: cm.getCgroupConfig(nodeAllocatable, false),
|
||||
}
|
||||
|
||||
// Using ObjectReference for events as the node maybe not cached; refer to #42701 for detail.
|
||||
nodeRef := nodeRefFromNode(cm.nodeInfo.Name)
|
||||
|
||||
// If Node Allocatable is enforced on a node that has not been drained or is updated on an existing node to a lower value,
|
||||
// existing memory usage across pods might be higher than current Node Allocatable Memory Limits.
|
||||
// Pod Evictions are expected to bring down memory usage to below Node Allocatable limits.
|
||||
// Until evictions happen retry cgroup updates.
|
||||
// Update limits on non root cgroup-root to be safe since the default limits for CPU can be too low.
|
||||
// Check if cgroupRoot is set to a non-empty value (empty would be the root container)
|
||||
if len(cm.cgroupRoot) > 0 {
|
||||
go func() {
|
||||
for {
|
||||
err := cm.cgroupManager.Update(cgroupConfig)
|
||||
if err == nil {
|
||||
cm.recorder.Event(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated Node Allocatable limit across pods")
|
||||
return
|
||||
}
|
||||
message := fmt.Sprintf("Failed to update Node Allocatable Limits %q: %v", cm.cgroupRoot, err)
|
||||
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
|
||||
time.Sleep(time.Minute)
|
||||
}
|
||||
}()
|
||||
}
|
||||
// Now apply kube reserved and system reserved limits if required.
|
||||
if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedEnforcementKey) {
|
||||
klog.V(2).InfoS("Enforcing system reserved on cgroup", "cgroupName", nc.SystemReservedCgroupName, "limits", nc.SystemReserved)
|
||||
if err := cm.enforceExistingCgroup(nc.SystemReservedCgroupName, nc.SystemReserved, false); err != nil {
|
||||
message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
|
||||
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
|
||||
return errors.New(message)
|
||||
}
|
||||
cm.recorder.Eventf(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on system reserved cgroup %v", nc.SystemReservedCgroupName)
|
||||
}
|
||||
if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedEnforcementKey) {
|
||||
klog.V(2).InfoS("Enforcing kube reserved on cgroup", "cgroupName", nc.KubeReservedCgroupName, "limits", nc.KubeReserved)
|
||||
if err := cm.enforceExistingCgroup(nc.KubeReservedCgroupName, nc.KubeReserved, false); err != nil {
|
||||
message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
|
||||
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
|
||||
return errors.New(message)
|
||||
}
|
||||
cm.recorder.Eventf(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on kube reserved cgroup %v", nc.KubeReservedCgroupName)
|
||||
}
|
||||
|
||||
if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedCompressibleEnforcementKey) {
|
||||
klog.V(2).InfoS("Enforcing system reserved compressible on cgroup", "cgroupName", nc.SystemReservedCgroupName, "limits", nc.SystemReserved)
|
||||
if err := cm.enforceExistingCgroup(nc.SystemReservedCgroupName, nc.SystemReserved, true); err != nil {
|
||||
message := fmt.Sprintf("Failed to enforce System Reserved Compressible Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
|
||||
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
|
||||
return errors.New(message)
|
||||
}
|
||||
cm.recorder.Eventf(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on system reserved cgroup %v", nc.SystemReservedCgroupName)
|
||||
}
|
||||
|
||||
if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedCompressibleEnforcementKey) {
|
||||
klog.V(2).InfoS("Enforcing kube reserved compressible on cgroup", "cgroupName", nc.KubeReservedCgroupName, "limits", nc.KubeReserved)
|
||||
if err := cm.enforceExistingCgroup(nc.KubeReservedCgroupName, nc.KubeReserved, true); err != nil {
|
||||
message := fmt.Sprintf("Failed to enforce Kube Reserved Compressible Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
|
||||
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
|
||||
return errors.New(message)
|
||||
}
|
||||
cm.recorder.Eventf(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on kube reserved cgroup %v", nc.KubeReservedCgroupName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
|
||||
func (cm *containerManagerImpl) enforceExistingCgroup(cNameStr string, rl v1.ResourceList, compressibleResources bool) error {
|
||||
cName := cm.cgroupManager.CgroupName(cNameStr)
|
||||
rp := cm.getCgroupConfig(rl, compressibleResources)
|
||||
if rp == nil {
|
||||
return fmt.Errorf("%q cgroup is not configured properly", cName)
|
||||
}
|
||||
|
||||
// Enforce MemoryQoS for cgroups of kube-reserved/system-reserved. For more information,
|
||||
// see https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2570-memory-qos
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) {
|
||||
if rp.Memory != nil {
|
||||
if rp.Unified == nil {
|
||||
rp.Unified = make(map[string]string)
|
||||
}
|
||||
rp.Unified[Cgroup2MemoryMin] = strconv.FormatInt(*rp.Memory, 10)
|
||||
}
|
||||
}
|
||||
|
||||
cgroupConfig := &CgroupConfig{
|
||||
Name: cName,
|
||||
ResourceParameters: rp,
|
||||
}
|
||||
klog.V(4).InfoS("Enforcing limits on cgroup", "cgroupName", cName, "cpuShares", cgroupConfig.ResourceParameters.CPUShares, "memory", cgroupConfig.ResourceParameters.Memory, "pidsLimit", cgroupConfig.ResourceParameters.PidsLimit)
|
||||
if err := cm.cgroupManager.Validate(cgroupConfig.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cm.cgroupManager.Update(cgroupConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getCgroupConfig returns a ResourceConfig object that can be used to create or update cgroups via CgroupManager interface.
|
||||
func (cm *containerManagerImpl) getCgroupConfig(rl v1.ResourceList, compressibleResourcesOnly bool) *ResourceConfig {
|
||||
rc := getCgroupConfigInternal(rl, compressibleResourcesOnly)
|
||||
if rc == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// In the case of a None policy, cgroupv2 and systemd cgroup manager, we must make sure systemd is aware of the cpuset cgroup.
|
||||
// By default, systemd will not create it, as we've not chosen to delegate it, and we haven't included it in the Apply() request.
|
||||
// However, this causes a bug where kubelet restarts unnecessarily (cpuset cgroup is created in the cgroupfs, but systemd
|
||||
// doesn't know about it and deletes it, and then kubelet doesn't continue because the cgroup isn't configured as expected).
|
||||
// An alternative is to delegate the `cpuset` cgroup to the kubelet, but that would require some plumbing in libcontainer,
|
||||
// and this is sufficient.
|
||||
// Only do so on None policy, as Static policy will do its own updating of the cpuset.
|
||||
// Please see the comment on policy none's GetAllocatableCPUs
|
||||
if cm.cpuManager.GetAllocatableCPUs().IsEmpty() {
|
||||
rc.CPUSet = cm.cpuManager.GetAllCPUs()
|
||||
}
|
||||
|
||||
return rc
|
||||
}
|
||||
|
||||
// getCgroupConfigInternal are the pieces of getCgroupConfig that don't require the cm object.
|
||||
// This is added to unit test without needing to create a full containerManager
|
||||
func getCgroupConfigInternal(rl v1.ResourceList, compressibleResourcesOnly bool) *ResourceConfig {
|
||||
// TODO(vishh): Set CPU Quota if necessary.
|
||||
if rl == nil {
|
||||
return nil
|
||||
}
|
||||
var rc ResourceConfig
|
||||
|
||||
setCompressibleResources := func() {
|
||||
if q, exists := rl[v1.ResourceCPU]; exists {
|
||||
// CPU is defined in milli-cores.
|
||||
val := MilliCPUToShares(q.MilliValue())
|
||||
rc.CPUShares = &val
|
||||
}
|
||||
}
|
||||
|
||||
// Only return compressible resources
|
||||
if compressibleResourcesOnly {
|
||||
setCompressibleResources()
|
||||
} else {
|
||||
if q, exists := rl[v1.ResourceMemory]; exists {
|
||||
// Memory is defined in bytes.
|
||||
val := q.Value()
|
||||
rc.Memory = &val
|
||||
}
|
||||
|
||||
setCompressibleResources()
|
||||
|
||||
if q, exists := rl[pidlimit.PIDs]; exists {
|
||||
val := q.Value()
|
||||
rc.PidsLimit = &val
|
||||
}
|
||||
rc.HugePageLimit = HugePageLimits(rl)
|
||||
}
|
||||
return &rc
|
||||
}
|
||||
|
||||
// GetNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement.
|
||||
// Note that not all resources that are available on the node are included in the returned list of resources.
|
||||
// Returns a ResourceList.
|
||||
func (cm *containerManagerImpl) GetNodeAllocatableAbsolute() v1.ResourceList {
|
||||
return cm.getNodeAllocatableAbsoluteImpl(cm.capacity)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) getNodeAllocatableAbsoluteImpl(capacity v1.ResourceList) v1.ResourceList {
|
||||
result := make(v1.ResourceList)
|
||||
for k, v := range capacity {
|
||||
value := v.DeepCopy()
|
||||
if cm.NodeConfig.SystemReserved != nil {
|
||||
value.Sub(cm.NodeConfig.SystemReserved[k])
|
||||
}
|
||||
if cm.NodeConfig.KubeReserved != nil {
|
||||
value.Sub(cm.NodeConfig.KubeReserved[k])
|
||||
}
|
||||
if value.Sign() < 0 {
|
||||
// Negative Allocatable resources don't make sense.
|
||||
value.Set(0)
|
||||
}
|
||||
result[k] = value
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// getNodeAllocatableInternalAbsolute is similar to getNodeAllocatableAbsolute except that
|
||||
// it also includes internal resources (currently process IDs). It is intended for setting
|
||||
// up top level cgroups only.
|
||||
func (cm *containerManagerImpl) getNodeAllocatableInternalAbsolute() v1.ResourceList {
|
||||
return cm.getNodeAllocatableAbsoluteImpl(cm.internalCapacity)
|
||||
}
|
||||
|
||||
// GetNodeAllocatableReservation returns amount of compute or storage resource that have to be reserved on this node from scheduling.
|
||||
func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList {
|
||||
evictionReservation := hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
|
||||
result := make(v1.ResourceList)
|
||||
for k := range cm.capacity {
|
||||
value := resource.NewQuantity(0, resource.DecimalSI)
|
||||
if cm.NodeConfig.SystemReserved != nil {
|
||||
value.Add(cm.NodeConfig.SystemReserved[k])
|
||||
}
|
||||
if cm.NodeConfig.KubeReserved != nil {
|
||||
value.Add(cm.NodeConfig.KubeReserved[k])
|
||||
}
|
||||
if evictionReservation != nil {
|
||||
value.Add(evictionReservation[k])
|
||||
}
|
||||
if !value.IsZero() {
|
||||
result[k] = *value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// validateNodeAllocatable ensures that the user specified Node Allocatable Configuration doesn't reserve more than the node capacity.
|
||||
// Returns error if the configuration is invalid, nil otherwise.
|
||||
func (cm *containerManagerImpl) validateNodeAllocatable() error {
|
||||
var errors []string
|
||||
nar := cm.GetNodeAllocatableReservation()
|
||||
for k, v := range nar {
|
||||
value := cm.capacity[k].DeepCopy()
|
||||
value.Sub(v)
|
||||
|
||||
if value.Sign() < 0 {
|
||||
errors = append(errors, fmt.Sprintf("Resource %q has a reservation of %v but capacity of %v. Expected capacity >= reservation.", k, v, cm.capacity[k]))
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) > 0 {
|
||||
return fmt.Errorf("invalid Node Allocatable configuration. %s", strings.Join(errors, " "))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Using ObjectReference for events as the node maybe not cached; refer to #42701 for detail.
|
||||
func nodeRefFromNode(nodeName string) *v1.ObjectReference {
|
||||
return &v1.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: nodeName,
|
||||
UID: types.UID(nodeName),
|
||||
Namespace: "",
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user