rebase: update replaced k8s.io modules to v0.33.0

Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
Niels de Vos
2025-05-07 13:13:33 +02:00
committed by mergify[bot]
parent dd77e72800
commit 107407b44b
1723 changed files with 65035 additions and 175239 deletions

View File

@ -6,6 +6,7 @@ approvers:
- derekwaynecarr
- yujuhong
- klueska
- ffromani
reviewers:
- sig-node-reviewers
emeritus_approvers:

View File

@ -25,11 +25,10 @@ import (
"sync"
"time"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
libcontainercgroupmanager "github.com/opencontainers/runc/libcontainer/cgroups/manager"
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
libcontainercgroups "github.com/opencontainers/cgroups"
"github.com/opencontainers/cgroups/fscommon"
libcontainercgroupmanager "github.com/opencontainers/cgroups/manager"
cgroupsystemd "github.com/opencontainers/cgroups/systemd"
"k8s.io/klog/v2"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
@ -195,14 +194,14 @@ func (m *cgroupCommon) buildCgroupPaths(name CgroupName) map[string]string {
}
// libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config.
func (m *cgroupCommon) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup {
config := &libcontainerconfigs.Cgroup{
func (m *cgroupCommon) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainercgroups.Cgroup {
config := &libcontainercgroups.Cgroup{
Systemd: m.useSystemd,
}
if needResources {
config.Resources = m.toResources(in.ResourceParameters)
} else {
config.Resources = &libcontainerconfigs.Resources{}
config.Resources = &libcontainercgroups.Resources{}
}
if !config.Systemd {
@ -279,8 +278,8 @@ var (
availableRootControllers sets.Set[string]
)
func (m *cgroupCommon) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources {
resources := &libcontainerconfigs.Resources{
func (m *cgroupCommon) toResources(resourceConfig *ResourceConfig) *libcontainercgroups.Resources {
resources := &libcontainercgroups.Resources{
SkipDevices: true,
SkipFreezeOnSet: true,
}
@ -324,7 +323,7 @@ func (m *cgroupCommon) toResources(resourceConfig *ResourceConfig) *libcontainer
return resources
}
func (m *cgroupCommon) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) {
func (m *cgroupCommon) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainercgroups.Resources) {
// Check if hugetlb is supported.
if libcontainercgroups.IsCgroup2UnifiedMode() {
if !getSupportedUnifiedControllers().Has("hugetlb") {
@ -344,7 +343,7 @@ func (m *cgroupCommon) maybeSetHugetlb(resourceConfig *ResourceConfig, resources
klog.InfoS("Invalid pageSize", "err", err)
continue
}
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainercgroups.HugepageLimit{
Pagesize: sizeString,
Limit: uint64(limit),
})
@ -355,7 +354,7 @@ func (m *cgroupCommon) maybeSetHugetlb(resourceConfig *ResourceConfig, resources
if pageSizes.Has(pageSize) {
continue
}
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{
resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainercgroups.HugepageLimit{
Pagesize: pageSize,
Limit: uint64(0),
})

View File

@ -22,8 +22,8 @@ import (
"strconv"
"strings"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
libcontainercgroups "github.com/opencontainers/cgroups"
"github.com/opencontainers/cgroups/fscommon"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
)

View File

@ -24,13 +24,17 @@ import (
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/cgroups/fscommon"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
)
const cgroupv2MemLimitFile string = "memory.max"
const (
cgroupv2MemLimitFile = "memory.max"
cgroupv2CpuMaxFile = "cpu.max"
cgroupv2CpuWeightFile = "cpu.weight"
)
// cgroupV2impl implements the CgroupManager interface
// for cgroup v2.
@ -100,14 +104,14 @@ func (c *cgroupV2impl) GetCgroupConfig(name CgroupName, resource v1.ResourceName
func (c *cgroupV2impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, error) {
var cpuLimitStr, cpuPeriodStr string
cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max")
cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, cgroupv2CpuMaxFile)
if err != nil {
return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %w", cgroupPath, err)
return nil, fmt.Errorf("failed to read %s file for cgroup %v: %w", cgroupv2CpuMaxFile, cgroupPath, err)
}
numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr)
if errScan != nil || numItems != 2 {
return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %w",
cpuLimitAndPeriod, cgroupPath, errScan)
return nil, fmt.Errorf("failed to correctly parse content of %s file ('%s') for cgroup %v: %w",
cgroupv2CpuMaxFile, cpuLimitAndPeriod, cgroupPath, errScan)
}
cpuLimit := int64(-1)
if cpuLimitStr != Cgroup2MaxCpuLimit {
@ -120,7 +124,7 @@ func (c *cgroupV2impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, e
if errPeriod != nil {
return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %w", cgroupPath, errPeriod)
}
cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight")
cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, cgroupv2CpuWeightFile)
if errWeight != nil {
return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %w", cgroupPath, errWeight)
}

View File

@ -31,6 +31,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apiserver/pkg/server/healthz"
internalapi "k8s.io/cri-api/pkg/apis"
"k8s.io/klog/v2"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
@ -154,6 +155,13 @@ type ContainerManager interface {
// Updates returns a channel that receives an Update when the device changed its status.
Updates() <-chan resourceupdates.Update
// PodHasExclusiveCPUs returns true if the provided pod has containers with exclusive CPUs,
// This means that at least one sidecar container or one app container has exclusive CPUs allocated.
PodHasExclusiveCPUs(pod *v1.Pod) bool
// ContainerHasExclusiveCPUs returns true if the provided container in the pod has exclusive cpu
ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool
// Implements the PodResources Provider API
podresources.CPUsProvider
podresources.DevicesProvider
@ -161,6 +169,10 @@ type ContainerManager interface {
podresources.DynamicResourcesProvider
}
type cpuAllocationReader interface {
GetExclusiveCPUs(podUID, containerName string) cpuset.CPUSet
}
type NodeConfig struct {
NodeName types.NodeName
RuntimeCgroupsName string
@ -174,19 +186,19 @@ type NodeConfig struct {
KubeletRootDir string
ProtectKernelDefaults bool
NodeAllocatableConfig
QOSReserved map[v1.ResourceName]int64
CPUManagerPolicy string
CPUManagerPolicyOptions map[string]string
TopologyManagerScope string
CPUManagerReconcilePeriod time.Duration
ExperimentalMemoryManagerPolicy string
ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation
PodPidsLimit int64
EnforceCPULimits bool
CPUCFSQuotaPeriod time.Duration
TopologyManagerPolicy string
TopologyManagerPolicyOptions map[string]string
CgroupVersion int
QOSReserved map[v1.ResourceName]int64
CPUManagerPolicy string
CPUManagerPolicyOptions map[string]string
TopologyManagerScope string
CPUManagerReconcilePeriod time.Duration
MemoryManagerPolicy string
MemoryManagerReservedMemory []kubeletconfig.MemoryReservation
PodPidsLimit int64
EnforceCPULimits bool
CPUCFSQuotaPeriod time.Duration
TopologyManagerPolicy string
TopologyManagerPolicyOptions map[string]string
CgroupVersion int
}
type NodeAllocatableConfig struct {
@ -212,6 +224,30 @@ func int64Slice(in []int) []int64 {
return out
}
func podHasExclusiveCPUs(cr cpuAllocationReader, pod *v1.Pod) bool {
for _, container := range pod.Spec.InitContainers {
if containerHasExclusiveCPUs(cr, pod, &container) {
return true
}
}
for _, container := range pod.Spec.Containers {
if containerHasExclusiveCPUs(cr, pod, &container) {
return true
}
}
klog.V(4).InfoS("Pod contains no container with pinned cpus", "podName", pod.Name)
return false
}
func containerHasExclusiveCPUs(cr cpuAllocationReader, pod *v1.Pod, container *v1.Container) bool {
exclusiveCPUs := cr.GetExclusiveCPUs(string(pod.UID), container.Name)
if !exclusiveCPUs.IsEmpty() {
klog.V(4).InfoS("Container has pinned cpus", "podName", pod.Name, "containerName", container.Name)
return true
}
return false
}
// parsePercentage parses the percentage string to numeric value.
func parsePercentage(v string) (int64, error) {
if !strings.HasSuffix(v, "%") {

View File

@ -27,9 +27,8 @@ import (
"sync"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/manager"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/cgroups"
"github.com/opencontainers/cgroups/manager"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
utilpath "k8s.io/utils/path"
@ -336,10 +335,10 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
cm.topologyManager.AddHintProvider(cm.cpuManager)
cm.memoryManager, err = memorymanager.NewManager(
nodeConfig.ExperimentalMemoryManagerPolicy,
nodeConfig.MemoryManagerPolicy,
machineInfo,
cm.GetNodeAllocatableReservation(),
nodeConfig.ExperimentalMemoryManagerReservedMemory,
nodeConfig.MemoryManagerReservedMemory,
nodeConfig.KubeletRootDir,
cm.topologyManager,
)
@ -365,7 +364,8 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
enforceCPULimits: cm.EnforceCPULimits,
// cpuCFSQuotaPeriod is in microseconds. NodeConfig.CPUCFSQuotaPeriod is time.Duration (measured in nano seconds).
// Convert (cm.CPUCFSQuotaPeriod) [nanoseconds] / time.Microsecond (1000) to get cpuCFSQuotaPeriod in microseconds.
cpuCFSQuotaPeriod: uint64(cm.CPUCFSQuotaPeriod / time.Microsecond),
cpuCFSQuotaPeriod: uint64(cm.CPUCFSQuotaPeriod / time.Microsecond),
podContainerManager: cm,
}
}
return &podContainerManagerNoop{
@ -373,16 +373,24 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
}
}
func (cm *containerManagerImpl) PodHasExclusiveCPUs(pod *v1.Pod) bool {
return podHasExclusiveCPUs(cm.cpuManager, pod)
}
func (cm *containerManagerImpl) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
return containerHasExclusiveCPUs(cm.cpuManager, pod, container)
}
func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle {
return &internalContainerLifecycleImpl{cm.cpuManager, cm.memoryManager, cm.topologyManager}
}
// Create a cgroup container manager.
func createManager(containerName string) (cgroups.Manager, error) {
cg := &configs.Cgroup{
cg := &cgroups.Cgroup{
Parent: "/",
Name: containerName,
Resources: &configs.Resources{
Resources: &cgroups.Resources{
SkipDevices: true,
},
Systemd: false,

View File

@ -195,6 +195,14 @@ func (cm *containerManagerStub) Updates() <-chan resourceupdates.Update {
return nil
}
func (cm *containerManagerStub) PodHasExclusiveCPUs(pod *v1.Pod) bool {
return false
}
func (cm *containerManagerStub) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
return false
}
func NewStubContainerManager() ContainerManager {
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
}

View File

@ -168,10 +168,10 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
klog.InfoS("Creating memory manager")
cm.memoryManager, err = memorymanager.NewManager(
nodeConfig.ExperimentalMemoryManagerPolicy,
nodeConfig.MemoryManagerPolicy,
machineInfo,
cm.GetNodeAllocatableReservation(),
nodeConfig.ExperimentalMemoryManagerReservedMemory,
nodeConfig.MemoryManagerReservedMemory,
nodeConfig.KubeletRootDir,
cm.topologyManager,
)
@ -369,3 +369,11 @@ func (cm *containerManagerImpl) UnprepareDynamicResources(ctx context.Context, p
func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool {
return false
}
func (cm *containerManagerImpl) PodHasExclusiveCPUs(pod *v1.Pod) bool {
return podHasExclusiveCPUs(cm.cpuManager, pod)
}
func (cm *containerManagerImpl) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
return containerHasExclusiveCPUs(cm.cpuManager, pod, container)
}

View File

@ -18,6 +18,7 @@ package containermap
import (
"fmt"
"maps"
)
// cmItem (ContainerMap ITEM) is a pair podUID, containerName
@ -36,11 +37,7 @@ func NewContainerMap() ContainerMap {
// Clone creates a deep copy of the ContainerMap
func (cm ContainerMap) Clone() ContainerMap {
ret := make(ContainerMap, len(cm))
for key, val := range cm {
ret[key] = val
}
return ret
return maps.Clone(cm)
}
// Add adds a mapping of (containerID)->(podUID, containerName) to the ContainerMap

View File

@ -18,6 +18,7 @@ package cpumanager
import (
"fmt"
"maps"
"math"
"sort"
@ -39,11 +40,7 @@ const (
type mapIntInt map[int]int
func (m mapIntInt) Clone() mapIntInt {
cp := make(mapIntInt, len(m))
for k, v := range m {
cp[k] = v
}
return cp
return maps.Clone(m)
}
func (m mapIntInt) Keys() []int {

View File

@ -239,6 +239,8 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
return err
}
klog.V(4).InfoS("CPU manager started", "policy", m.policy.Name())
m.allocatableCPUs = m.policy.GetAllocatableCPUs(m.state)
if m.policy.Name() == string(PolicyNone) {
@ -465,7 +467,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name)
if cset.IsEmpty() {
// NOTE: This should not happen outside of tests.
klog.V(2).InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name)
klog.V(2).InfoS("ReconcileState: skipping container; empty cpuset assigned", "pod", klog.KObj(pod), "containerName", container.Name)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
continue
}

View File

@ -39,16 +39,17 @@ const (
var (
alphaOptions = sets.New[string](
DistributeCPUsAcrossNUMAOption,
AlignBySocketOption,
DistributeCPUsAcrossCoresOption,
StrictCPUReservationOption,
PreferAlignByUnCoreCacheOption,
)
betaOptions = sets.New[string](
StrictCPUReservationOption,
DistributeCPUsAcrossNUMAOption,
)
stableOptions = sets.New[string](
FullPCPUsOnlyOption,
)
stableOptions = sets.New[string]()
)
// CheckPolicyOptionAvailable verifies if the given option can be used depending on the Feature Gate Settings.
@ -66,6 +67,7 @@ func CheckPolicyOptionAvailable(option string) error {
return fmt.Errorf("CPU Manager Policy Beta-level Options not enabled, but option %q provided", option)
}
// if the option is stable, we need no CPUManagerPolicy*Options feature gate check
return nil
}

View File

@ -18,6 +18,7 @@ package cpumanager
import (
"fmt"
"strconv"
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -325,13 +326,16 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
defer func() {
if rerr != nil {
metrics.CPUManagerPinningErrorsTotal.Inc()
if p.options.FullPhysicalCPUsOnly {
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc()
}
return
}
if !p.options.FullPhysicalCPUsOnly {
// TODO: move in updateMetricsOnAllocate
if p.options.FullPhysicalCPUsOnly {
// increment only if we know we allocate aligned resources
return
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc()
}
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc()
}()
if p.options.FullPhysicalCPUsOnly {
@ -367,8 +371,8 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
}
}
}
if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
p.updateCPUsToReuse(pod, container, cpuset)
if cset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
p.updateCPUsToReuse(pod, container, cset)
klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name)
return nil
}
@ -378,16 +382,17 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint)
// Allocate CPUs according to the NUMA affinity contained in the hint.
cpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)])
cpuAllocation, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)])
if err != nil {
klog.ErrorS(err, "Unable to allocate CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs)
return err
}
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
p.updateCPUsToReuse(pod, container, cpuset)
p.updateMetricsOnAllocate(cpuset)
s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs)
p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs)
p.updateMetricsOnAllocate(s, cpuAllocation)
klog.V(4).InfoS("Allocated exclusive CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", cpuAllocation.CPUs.String())
return nil
}
@ -412,18 +417,19 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
// Mutate the shared pool, adding released cpus.
toRelease = toRelease.Difference(cpusInUse)
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
p.updateMetricsOnRelease(toRelease)
p.updateMetricsOnRelease(s, toRelease)
}
return nil
}
func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (cpuset.CPUSet, error) {
func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (topology.Allocation, error) {
klog.InfoS("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity)
allocatableCPUs := p.GetAvailableCPUs(s).Union(reusableCPUs)
// If there are aligned CPUs in numaAffinity, attempt to take those first.
result := cpuset.New()
result := topology.EmptyAllocation()
if numaAffinity != nil {
alignedCPUs := p.getAlignedCPUs(numaAffinity, allocatableCPUs)
@ -432,30 +438,33 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit
numAlignedToAlloc = numCPUs
}
alignedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc)
allocatedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc)
if err != nil {
return cpuset.New(), err
return topology.EmptyAllocation(), err
}
result = result.Union(alignedCPUs)
result.CPUs = result.CPUs.Union(allocatedCPUs)
}
// Get any remaining CPUs from what's leftover after attempting to grab aligned ones.
remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result), numCPUs-result.Size())
remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result.CPUs), numCPUs-result.CPUs.Size())
if err != nil {
return cpuset.New(), err
return topology.EmptyAllocation(), err
}
result = result.Union(remainingCPUs)
result.CPUs = result.CPUs.Union(remainingCPUs)
result.Aligned = p.topology.CheckAlignment(result.CPUs)
// Remove allocated CPUs from the shared CPUSet.
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result))
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result.CPUs))
klog.InfoS("AllocateCPUs", "result", result)
klog.InfoS("AllocateCPUs", "result", result.String())
return result, nil
}
func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int {
if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed {
qos := v1qos.GetPodQOS(pod)
if qos != v1.PodQOSGuaranteed {
klog.V(5).InfoS("Exclusive CPU allocation skipped, pod QoS is not guaranteed", "pod", klog.KObj(pod), "containerName", container.Name, "qos", qos)
return 0
}
cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
@ -464,11 +473,19 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
// We should return this value because this is what kubelet agreed to allocate for the container
// and the value configured with runtime.
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
containerStatuses := pod.Status.ContainerStatuses
if podutil.IsRestartableInitContainer(container) {
if len(pod.Status.InitContainerStatuses) != 0 {
containerStatuses = append(containerStatuses, pod.Status.InitContainerStatuses...)
}
}
if cs, ok := podutil.GetContainerStatus(containerStatuses, container.Name); ok {
cpuQuantity = cs.AllocatedResources[v1.ResourceCPU]
}
}
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
cpuValue := cpuQuantity.Value()
if cpuValue*1000 != cpuQuantity.MilliValue() {
klog.V(5).InfoS("Exclusive CPU allocation skipped, pod requested non-integral CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpu", cpuValue)
return 0
}
// Safe downcast to do for all systems with < 2.1 billion CPUs.
@ -740,27 +757,60 @@ func (p *staticPolicy) getAlignedCPUs(numaAffinity bitmask.BitMask, allocatableC
func (p *staticPolicy) initializeMetrics(s state.State) {
metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000))
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(countExclusiveCPUs(s)))
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Add(0) // ensure the value exists
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(totalAssignedCPUs.Size()))
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
}
func (p *staticPolicy) updateMetricsOnAllocate(cset cpuset.CPUSet) {
ncpus := cset.Size()
func (p *staticPolicy) updateMetricsOnAllocate(s state.State, cpuAlloc topology.Allocation) {
ncpus := cpuAlloc.CPUs.Size()
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(ncpus))
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(-ncpus * 1000))
if cpuAlloc.Aligned.UncoreCache {
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Inc()
}
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
}
func (p *staticPolicy) updateMetricsOnRelease(cset cpuset.CPUSet) {
func (p *staticPolicy) updateMetricsOnRelease(s state.State, cset cpuset.CPUSet) {
ncpus := cset.Size()
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(-ncpus))
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(ncpus * 1000))
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs.Difference(cset))
}
func countExclusiveCPUs(s state.State) int {
exclusiveCPUs := 0
for _, cpuAssign := range s.GetCPUAssignments() {
for _, cset := range cpuAssign {
exclusiveCPUs += cset.Size()
func getTotalAssignedExclusiveCPUs(s state.State) cpuset.CPUSet {
totalAssignedCPUs := cpuset.New()
for _, assignment := range s.GetCPUAssignments() {
for _, cset := range assignment {
totalAssignedCPUs = totalAssignedCPUs.Union(cset)
}
}
return totalAssignedCPUs
}
func updateAllocationPerNUMAMetric(topo *topology.CPUTopology, allocatedCPUs cpuset.CPUSet) {
numaCount := make(map[int]int)
// Count CPUs allocated per NUMA node
for _, cpuID := range allocatedCPUs.UnsortedList() {
numaNode, err := topo.CPUNUMANodeID(cpuID)
if err != nil {
//NOTE: We are logging the error but it is highly unlikely to happen as the CPUset
// is already computed, evaluated and there is no room for user tampering.
klog.ErrorS(err, "Unable to determine NUMA node", "cpuID", cpuID)
}
numaCount[numaNode]++
}
// Update metric
for numaNode, count := range numaCount {
metrics.CPUManagerAllocationPerNUMA.WithLabelValues(strconv.Itoa(numaNode)).Set(float64(count))
}
return exclusiveCPUs
}

View File

@ -201,7 +201,7 @@ func (sc *stateCheckpoint) SetCPUSet(podUID string, containerName string, cset c
sc.cache.SetCPUSet(podUID, containerName, cset)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName)
}
}
@ -212,7 +212,7 @@ func (sc *stateCheckpoint) SetDefaultCPUSet(cset cpuset.CPUSet) {
sc.cache.SetDefaultCPUSet(cset)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint")
}
}
@ -223,7 +223,7 @@ func (sc *stateCheckpoint) SetCPUAssignments(a ContainerCPUAssignments) {
sc.cache.SetCPUAssignments(a)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint")
}
}
@ -234,7 +234,7 @@ func (sc *stateCheckpoint) Delete(podUID string, containerName string) {
sc.cache.Delete(podUID, containerName)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName)
}
}
@ -245,6 +245,6 @@ func (sc *stateCheckpoint) ClearState() {
sc.cache.ClearState()
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint")
}
}

View File

@ -0,0 +1,78 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package topology
import (
"fmt"
"k8s.io/utils/cpuset"
)
// Alignment is metadata about a cpuset allocation
type Alignment struct {
// UncoreCache is true if all the CPUs are uncore-cache aligned,
// IOW if they all share the same Uncore cache block.
// If the allocated CPU count is greater than a Uncore Group size,
// CPUs can't be uncore-aligned; otherwise, they are.
// This flag tracks alignment, not interference or lack thereof.
UncoreCache bool
}
func (ca Alignment) String() string {
return fmt.Sprintf("aligned=<uncore:%v>", ca.UncoreCache)
}
// Allocation represents a CPU set plus alignment metadata
type Allocation struct {
CPUs cpuset.CPUSet
Aligned Alignment
}
func (ca Allocation) String() string {
return ca.CPUs.String() + " " + ca.Aligned.String()
}
// EmptyAllocation returns a new zero-valued CPU allocation. Please note that
// a empty cpuset is aligned according to every possible way we can consider
func EmptyAllocation() Allocation {
return Allocation{
CPUs: cpuset.New(),
Aligned: Alignment{
UncoreCache: true,
},
}
}
func isAlignedAtUncoreCache(topo *CPUTopology, cpuList ...int) bool {
if len(cpuList) <= 1 {
return true
}
reference, ok := topo.CPUDetails[cpuList[0]]
if !ok {
return false
}
for _, cpu := range cpuList[1:] {
info, ok := topo.CPUDetails[cpu]
if !ok {
return false
}
if info.UncoreCacheID != reference.UncoreCacheID {
return false
}
}
return true
}

View File

@ -15,4 +15,4 @@ limitations under the License.
*/
// Package topology contains helpers for the CPU manager.
package topology // import "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
package topology

View File

@ -101,6 +101,15 @@ func (topo *CPUTopology) CPUNUMANodeID(cpu int) (int, error) {
return info.NUMANodeID, nil
}
// CheckAlignment returns alignment information for the given cpuset in
// the context of the current CPU topology
func (topo *CPUTopology) CheckAlignment(cpus cpuset.CPUSet) Alignment {
cpuList := cpus.UnsortedList()
return Alignment{
UncoreCache: isAlignedAtUncoreCache(topo, cpuList...),
}
}
// CPUInfo contains the NUMA, socket, UncoreCache and core IDs associated with a CPU.
type CPUInfo struct {
NUMANodeID int

View File

@ -202,15 +202,12 @@ func (m *ManagerImpl) CleanupPluginDirectory(dir string) error {
if filePath == m.checkpointFile() {
continue
}
// TODO: Until the bug - https://github.com/golang/go/issues/33357 is fixed, os.stat wouldn't return the
// right mode(socket) on windows. Hence deleting the file, without checking whether
// its a socket, on windows.
stat, err := os.Lstat(filePath)
stat, err := os.Stat(filePath)
if err != nil {
klog.ErrorS(err, "Failed to stat file", "path", filePath)
continue
}
if stat.IsDir() {
if stat.IsDir() || stat.Mode()&os.ModeSocket == 0 {
continue
}
err = os.RemoveAll(filePath)
@ -351,7 +348,7 @@ func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.Sourc
// Loads in allocatedDevices information from disk.
err := m.readCheckpoint()
if err != nil {
klog.InfoS("Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date", "err", err)
klog.ErrorS(err, "Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date")
}
return m.server.Start()
@ -453,7 +450,7 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
// should always be consistent. Otherwise, we run with the risk
// of failing to garbage collect non-existing resources or devices.
if !ok {
klog.ErrorS(nil, "Unexpected: healthyDevices and endpoints are out of sync")
klog.InfoS("Unexpected: healthyDevices and endpoints are out of sync")
}
delete(m.endpoints, resourceName)
delete(m.healthyDevices, resourceName)
@ -468,7 +465,7 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
eI, ok := m.endpoints[resourceName]
if (ok && eI.e.stopGracePeriodExpired()) || !ok {
if !ok {
klog.ErrorS(nil, "Unexpected: unhealthyDevices and endpoints are out of sync")
klog.InfoS("Unexpected: unhealthyDevices and endpoints became out of sync")
}
delete(m.endpoints, resourceName)
delete(m.unhealthyDevices, resourceName)
@ -484,7 +481,7 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
m.mutex.Unlock()
if needsUpdateCheckpoint {
if err := m.writeCheckpoint(); err != nil {
klog.ErrorS(err, "Error on writing checkpoint")
klog.ErrorS(err, "Failed to write checkpoint file")
}
}
return capacity, allocatable, deletedResources.UnsortedList()
@ -503,9 +500,10 @@ func (m *ManagerImpl) writeCheckpoint() error {
err := m.checkpointManager.CreateCheckpoint(kubeletDeviceManagerCheckpoint, data)
if err != nil {
err2 := fmt.Errorf("failed to write checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
klog.InfoS("Failed to write checkpoint file", "err", err)
klog.ErrorS(err, "Failed to write checkpoint file")
return err2
}
klog.V(4).InfoS("Checkpoint file written", "checkpoint", kubeletDeviceManagerCheckpoint)
return nil
}
@ -516,7 +514,7 @@ func (m *ManagerImpl) readCheckpoint() error {
if err != nil {
if err == errors.ErrCheckpointNotFound {
// no point in trying anything else
klog.InfoS("Failed to read data from checkpoint", "checkpoint", kubeletDeviceManagerCheckpoint, "err", err)
klog.ErrorS(err, "Failed to read data from checkpoint", "checkpoint", kubeletDeviceManagerCheckpoint)
return nil
}
return err
@ -534,6 +532,8 @@ func (m *ManagerImpl) readCheckpoint() error {
m.unhealthyDevices[resource] = sets.New[string]()
m.endpoints[resource] = endpointInfo{e: newStoppedEndpointImpl(resource), opts: nil}
}
klog.V(4).InfoS("Read data from checkpoint file", "checkpoint", kubeletDeviceManagerCheckpoint)
return nil
}
@ -596,7 +596,7 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
// running, then it can only be a kubelet restart. On node reboot the runtime and the containers were also shut down. Then, if the container was running, it can only be
// because it already has access to all the required devices, so we got nothing to do and we can bail out.
if !m.sourcesReady.AllReady() && m.isContainerAlreadyRunning(podUID, contName) {
klog.V(3).InfoS("container detected running, nothing to do", "deviceNumber", needed, "resourceName", resource, "podUID", podUID, "containerName", contName)
klog.V(3).InfoS("Container detected running, nothing to do", "deviceNumber", needed, "resourceName", resource, "podUID", podUID, "containerName", contName)
return nil, nil
}
@ -627,7 +627,7 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
// We handled the known error paths in scenario 3 (node reboot), so from now on we can fall back in a common path.
// We cover container restart on kubelet steady state with the same flow.
if needed == 0 {
klog.V(3).InfoS("no devices needed, nothing to do", "deviceNumber", needed, "resourceName", resource, "podUID", podUID, "containerName", contName)
klog.V(3).InfoS("No devices needed, nothing to do", "deviceNumber", needed, "resourceName", resource, "podUID", podUID, "containerName", contName)
// No change, no work.
return nil, nil
}
@ -836,7 +836,7 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
for k, v := range container.Resources.Limits {
resource := string(k)
needed := int(v.Value())
klog.V(3).InfoS("Looking for needed resources", "needed", needed, "resourceName", resource)
klog.V(3).InfoS("Looking for needed resources", "resourceName", resource, "pod", klog.KObj(pod), "containerName", container.Name, "needed", needed)
if !m.isDevicePluginResource(resource) {
continue
}
@ -882,7 +882,7 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
devs := allocDevices.UnsortedList()
// TODO: refactor this part of code to just append a ContainerAllocationRequest
// in a passed in AllocateRequest pointer, and issues a single Allocate call per pod.
klog.V(3).InfoS("Making allocation request for device plugin", "devices", devs, "resourceName", resource)
klog.V(4).InfoS("Making allocation request for device plugin", "devices", devs, "resourceName", resource, "pod", klog.KObj(pod), "containerName", container.Name)
resp, err := eI.e.allocate(devs)
metrics.DevicePluginAllocationDuration.WithLabelValues(resource).Observe(metrics.SinceInSeconds(startRPCTime))
if err != nil {
@ -952,7 +952,7 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
}
if !m.checkPodActive(pod) {
klog.ErrorS(nil, "pod deleted from activePods, skip to reAllocate", "podUID", podUID)
klog.V(5).InfoS("Pod deleted from activePods, skip to reAllocate", "pod", klog.KObj(pod), "podUID", podUID, "containerName", container.Name)
continue
}
@ -984,7 +984,7 @@ func (m *ManagerImpl) callPreStartContainerIfNeeded(podUID, contName, resource s
if eI.opts == nil || !eI.opts.PreStartRequired {
m.mutex.Unlock()
klog.V(4).InfoS("Plugin options indicate to skip PreStartContainer for resource", "resourceName", resource)
klog.V(5).InfoS("Plugin options indicate to skip PreStartContainer for resource", "podUID", podUID, "resourceName", resource, "containerName", contName)
return nil
}
@ -1014,12 +1014,12 @@ func (m *ManagerImpl) callGetPreferredAllocationIfAvailable(podUID, contName, re
}
if eI.opts == nil || !eI.opts.GetPreferredAllocationAvailable {
klog.V(4).InfoS("Plugin options indicate to skip GetPreferredAllocation for resource", "resourceName", resource)
klog.V(5).InfoS("Plugin options indicate to skip GetPreferredAllocation for resource", "resourceName", resource, "podUID", podUID, "containerName", contName)
return nil, nil
}
m.mutex.Unlock()
klog.V(4).InfoS("Issuing a GetPreferredAllocation call for container", "containerName", contName, "podUID", podUID)
klog.V(4).InfoS("Issuing a GetPreferredAllocation call for container", "resourceName", resource, "containerName", contName, "podUID", podUID)
resp, err := eI.e.getPreferredAllocation(available.UnsortedList(), mustInclude.UnsortedList(), size)
m.mutex.Lock()
if err != nil {
@ -1167,7 +1167,7 @@ func (m *ManagerImpl) ShouldResetExtendedResourceCapacity() bool {
func (m *ManagerImpl) isContainerAlreadyRunning(podUID, cntName string) bool {
cntID, err := m.containerMap.GetContainerID(podUID, cntName)
if err != nil {
klog.V(4).InfoS("container not found in the initial map, assumed NOT running", "podUID", podUID, "containerName", cntName, "err", err)
klog.ErrorS(err, "Container not found in the initial map, assumed NOT running", "podUID", podUID, "containerName", cntName)
return false
}
@ -1175,11 +1175,11 @@ func (m *ManagerImpl) isContainerAlreadyRunning(podUID, cntName string) bool {
// so on kubelet restart containers will again fail admission, hitting https://github.com/kubernetes/kubernetes/issues/118559 again.
// This scenario should however be rare enough.
if !m.containerRunningSet.Has(cntID) {
klog.V(4).InfoS("container not present in the initial running set", "podUID", podUID, "containerName", cntName, "containerID", cntID)
klog.V(4).InfoS("Container not present in the initial running set", "podUID", podUID, "containerName", cntName, "containerID", cntID)
return false
}
// Once we make it here we know we have a running container.
klog.V(4).InfoS("container found in the initial set, assumed running", "podUID", podUID, "containerName", cntName, "containerID", cntID)
klog.V(4).InfoS("Container found in the initial set, assumed running", "podUID", podUID, "containerName", cntName, "containerID", cntID)
return true
}

View File

@ -106,6 +106,8 @@ func (c *client) Disconnect() error {
}
c.mutex.Unlock()
c.handler.PluginDisconnected(c.resource)
klog.V(2).InfoS("Device plugin disconnected", "resource", c.resource)
return nil
}

View File

@ -43,8 +43,8 @@ func (s *server) RegisterPlugin(pluginName string, endpoint string, versions []s
return s.connectClient(pluginName, endpoint)
}
func (s *server) DeRegisterPlugin(pluginName string) {
klog.V(2).InfoS("Deregistering plugin", "plugin", pluginName)
func (s *server) DeRegisterPlugin(pluginName, endpoint string) {
klog.V(2).InfoS("Deregistering plugin", "plugin", pluginName, "endpoint", endpoint)
client := s.getClient(pluginName)
if client != nil {
s.disconnectClient(pluginName, client)
@ -62,6 +62,7 @@ func (s *server) ValidatePlugin(pluginName string, endpoint string, versions []s
return fmt.Errorf("invalid name of device plugin socket: %s", fmt.Sprintf(errInvalidResourceName, pluginName))
}
klog.V(2).InfoS("Device plugin validated", "plugin", pluginName, "endpoint", endpoint, "versions", versions)
return nil
}
@ -75,6 +76,7 @@ func (s *server) connectClient(name string, socketPath string) error {
return err
}
klog.V(2).InfoS("Connected to new client", "resource", name)
go func() {
s.runClient(name, c)
}()
@ -86,7 +88,6 @@ func (s *server) disconnectClient(name string, c Client) error {
s.deregisterClient(name)
return c.Disconnect()
}
func (s *server) registerClient(name string, c Client) {
s.mutex.Lock()
defer s.mutex.Unlock()
@ -112,7 +113,7 @@ func (s *server) runClient(name string, c Client) {
}
if err := s.disconnectClient(name, c); err != nil {
klog.V(2).InfoS("Unable to disconnect client", "resource", name, "client", c, "err", err)
klog.ErrorS(err, "Unable to disconnect client", "resource", name, "client", c)
}
}

View File

@ -91,7 +91,7 @@ func (s *server) Start() error {
if selinux.GetEnabled() {
if err := selinux.SetFileLabel(s.socketDir, config.KubeletPluginsDirSELinuxLabel); err != nil {
klog.InfoS("Unprivileged containerized plugins might not work. Could not set selinux context on socket dir", "path", s.socketDir, "err", err)
klog.ErrorS(err, "Unprivileged containerized plugins might not work. Could not set selinux context on socket dir", "path", s.socketDir)
}
}
@ -128,7 +128,7 @@ func (s *server) Start() error {
func (s *server) Stop() error {
s.visitClients(func(r string, c Client) {
if err := s.disconnectClient(r, c); err != nil {
klog.InfoS("Error disconnecting device plugin client", "resourceName", r, "err", err)
klog.ErrorS(err, "Failed to disconnect device plugin client", "resourceName", r)
}
})
@ -145,6 +145,7 @@ func (s *server) Stop() error {
// During kubelet termination, we do not need the registration server,
// and we consider the kubelet to be healthy even when it is down.
s.setHealthy()
klog.V(2).InfoS("Stopping device plugin registration server")
return nil
}
@ -159,18 +160,18 @@ func (s *server) Register(ctx context.Context, r *api.RegisterRequest) (*api.Emp
if !s.isVersionCompatibleWithPlugin(r.Version) {
err := fmt.Errorf(errUnsupportedVersion, r.Version, api.SupportedVersions)
klog.InfoS("Bad registration request from device plugin with resource", "resourceName", r.ResourceName, "err", err)
klog.ErrorS(err, "Bad registration request from device plugin with resource", "resourceName", r.ResourceName)
return &api.Empty{}, err
}
if !v1helper.IsExtendedResourceName(core.ResourceName(r.ResourceName)) {
err := fmt.Errorf(errInvalidResourceName, r.ResourceName)
klog.InfoS("Bad registration request from device plugin", "err", err)
klog.ErrorS(err, "Bad registration request from device plugin")
return &api.Empty{}, err
}
if err := s.connectClient(r.ResourceName, filepath.Join(s.socketDir, r.Endpoint)); err != nil {
klog.InfoS("Error connecting to device plugin client", "err", err)
klog.ErrorS(err, "Error connecting to device plugin client")
return &api.Empty{}, err
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package devicemanager
import (
"maps"
"sync"
"k8s.io/klog/v2"
@ -429,10 +430,7 @@ func NewResourceDeviceInstances() ResourceDeviceInstances {
func (rdev ResourceDeviceInstances) Clone() ResourceDeviceInstances {
clone := NewResourceDeviceInstances()
for resourceName, resourceDevs := range rdev {
clone[resourceName] = make(map[string]pluginapi.Device)
for devID, dev := range resourceDevs {
clone[resourceName][devID] = dev
}
clone[resourceName] = maps.Clone(resourceDevs)
}
return clone
}

View File

@ -43,7 +43,7 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
for resource, requested := range accumulatedResourceRequests {
// Only consider devices that actually contain topology information.
if aligned := m.deviceHasTopologyAlignment(resource); !aligned {
klog.InfoS("Resource does not have a topology preference", "resource", resource)
klog.InfoS("Resource does not have a topology preference", "resourceName", resource, "pod", klog.KObj(pod), "containerName", container.Name, "request", requested)
deviceHints[resource] = nil
continue
}
@ -54,11 +54,11 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
allocated := m.podDevices.containerDevices(string(pod.UID), container.Name, resource)
if allocated.Len() > 0 {
if allocated.Len() != requested {
klog.ErrorS(nil, "Resource already allocated to pod with different number than request", "resource", resource, "pod", klog.KObj(pod), "containerName", container.Name, "request", requested, "allocated", allocated.Len())
klog.InfoS("Resource already allocated to pod with different number than request", "resourceName", resource, "pod", klog.KObj(pod), "containerName", container.Name, "request", requested, "allocated", allocated.Len())
deviceHints[resource] = []topologymanager.TopologyHint{}
continue
}
klog.InfoS("Regenerating TopologyHints for resource already allocated to pod", "resource", resource, "pod", klog.KObj(pod), "containerName", container.Name)
klog.InfoS("Regenerating TopologyHints for resource already allocated to pod", "resourceName", resource, "pod", klog.KObj(pod), "containerName", container.Name)
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.Set[string]{}, requested)
continue
}
@ -67,7 +67,7 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
available := m.getAvailableDevices(resource)
reusable := m.devicesToReuse[string(pod.UID)][resource]
if available.Union(reusable).Len() < requested {
klog.ErrorS(nil, "Unable to generate topology hints: requested number of devices unavailable", "resource", resource, "request", requested, "available", available.Union(reusable).Len())
klog.InfoS("Unable to generate topology hints: requested number of devices unavailable", "resourceName", resource, "pod", klog.KObj(pod), "containerName", container.Name, "request", requested, "available", available.Union(reusable).Len())
deviceHints[resource] = []topologymanager.TopologyHint{}
continue
}
@ -94,7 +94,7 @@ func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymana
for resource, requested := range accumulatedResourceRequests {
// Only consider devices that actually contain topology information.
if aligned := m.deviceHasTopologyAlignment(resource); !aligned {
klog.InfoS("Resource does not have a topology preference", "resource", resource)
klog.InfoS("Resource does not have a topology preference", "resourceName", resource, "pod", klog.KObj(pod), "request", requested)
deviceHints[resource] = nil
continue
}
@ -105,11 +105,11 @@ func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymana
allocated := m.podDevices.podDevices(string(pod.UID), resource)
if allocated.Len() > 0 {
if allocated.Len() != requested {
klog.ErrorS(nil, "Resource already allocated to pod with different number than request", "resource", resource, "pod", klog.KObj(pod), "request", requested, "allocated", allocated.Len())
klog.InfoS("Resource already allocated to pod with different number than request", "resourceName", resource, "pod", klog.KObj(pod), "request", requested, "allocated", allocated.Len())
deviceHints[resource] = []topologymanager.TopologyHint{}
continue
}
klog.InfoS("Regenerating TopologyHints for resource already allocated to pod", "resource", resource, "pod", klog.KObj(pod))
klog.InfoS("Regenerating TopologyHints for resource already allocated to pod", "resourceName", resource, "pod", klog.KObj(pod), "allocated", allocated.Len())
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.Set[string]{}, requested)
continue
}
@ -117,7 +117,7 @@ func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymana
// Get the list of available devices, for which TopologyHints should be generated.
available := m.getAvailableDevices(resource)
if available.Len() < requested {
klog.ErrorS(nil, "Unable to generate topology hints: requested number of devices unavailable", "resource", resource, "request", requested, "available", available.Len())
klog.InfoS("Unable to generate topology hints: requested number of devices unavailable", "resourceName", resource, "pod", klog.KObj(pod), "request", requested, "available", available.Len())
deviceHints[resource] = []topologymanager.TopologyHint{}
continue
}

View File

@ -18,4 +18,4 @@ limitations under the License.
// to manage containers. For example, they contain functions to configure containers' cgroups,
// ensure containers run with the desired QoS, and allocate compute resources like cpus, memory,
// devices...
package cm // import "k8s.io/kubernetes/pkg/kubelet/cm"
package cm

View File

@ -98,7 +98,20 @@ func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, n
}
func (m *ManagerImpl) GetWatcherHandler() cache.PluginHandler {
return cache.PluginHandler(dra.NewRegistrationHandler(m.kubeClient, m.getNode))
// The time that DRA drivers have to come back after being unregistered
// before the kubelet removes their ResourceSlices.
//
// This must be long enough to actually allow stopping a pod and
// starting the replacement (otherwise ResourceSlices get deleted
// unnecessarily) and not too long (otherwise the time window were
// pods might still get scheduled to the node after removal of a
// driver is too long).
//
// 30 seconds might be long enough for a simple container restart.
// If a DRA driver wants to be sure that slices don't get wiped,
// it should use rolling updates.
wipingDelay := 30 * time.Second
return cache.PluginHandler(dra.NewRegistrationHandler(m.kubeClient, m.getNode, wipingDelay))
}
// Start starts the reconcile loop of the manager.

View File

@ -18,13 +18,16 @@ package plugin
import (
"errors"
"fmt"
"slices"
"sync"
)
// PluginsStore holds a list of DRA Plugins.
type pluginsStore struct {
sync.RWMutex
store map[string]*Plugin
// plugin name -> Plugin in the order in which they got added
store map[string][]*Plugin
}
// draPlugins map keeps track of all registered DRA plugins on the node
@ -37,43 +40,57 @@ func (s *pluginsStore) get(pluginName string) *Plugin {
s.RLock()
defer s.RUnlock()
return s.store[pluginName]
instances := s.store[pluginName]
if len(instances) == 0 {
return nil
}
// Heuristic: pick the most recent one. It's most likely
// the newest, except when kubelet got restarted and registered
// all running plugins in random order.
return instances[len(instances)-1]
}
// Set lets you save a DRA Plugin to the list and give it a specific name.
// This method is protected by a mutex.
func (s *pluginsStore) add(p *Plugin) (replacedPlugin *Plugin, replaced bool) {
func (s *pluginsStore) add(p *Plugin) error {
s.Lock()
defer s.Unlock()
if s.store == nil {
s.store = make(map[string]*Plugin)
s.store = make(map[string][]*Plugin)
}
replacedPlugin, exists := s.store[p.name]
s.store[p.name] = p
if replacedPlugin != nil && replacedPlugin.cancel != nil {
replacedPlugin.cancel(errors.New("plugin got replaced"))
for _, oldP := range s.store[p.name] {
if oldP.endpoint == p.endpoint {
// One plugin instance cannot hijack the endpoint of another instance.
return fmt.Errorf("endpoint %s already registered for plugin %s", p.endpoint, p.name)
}
}
return replacedPlugin, exists
s.store[p.name] = append(s.store[p.name], p)
return nil
}
// Delete lets you delete a DRA Plugin by name.
// This method is protected by a mutex.
func (s *pluginsStore) delete(pluginName string) *Plugin {
// remove lets you remove one endpoint for a DRA Plugin.
// This method is protected by a mutex. It returns the
// plugin if found and true if that was the last instance
func (s *pluginsStore) remove(pluginName, endpoint string) (*Plugin, bool) {
s.Lock()
defer s.Unlock()
p, exists := s.store[pluginName]
if !exists {
return nil
instances := s.store[pluginName]
i := slices.IndexFunc(instances, func(p *Plugin) bool { return p.endpoint == endpoint })
if i == -1 {
return nil, false
}
p := instances[i]
last := len(instances) == 1
if last {
delete(s.store, pluginName)
} else {
s.store[pluginName] = slices.Delete(instances, i, i+1)
}
if p.cancel != nil {
p.cancel(errors.New("plugin got removed"))
}
delete(s.store, pluginName)
return p
return p, last
}

View File

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"slices"
"sync"
"time"
v1 "k8s.io/api/core/v1"
@ -51,8 +52,22 @@ type RegistrationHandler struct {
// This is necessary because it implements APIs which don't
// provide a context.
backgroundCtx context.Context
cancel func(err error)
kubeClient kubernetes.Interface
getNode func() (*v1.Node, error)
wipingDelay time.Duration
wg sync.WaitGroup
mutex sync.Mutex
// pendingWipes maps a plugin name to a cancel function for
// wiping of that plugin's ResourceSlices. Entries get added
// in DeRegisterPlugin and check in RegisterPlugin. If
// wiping is pending during RegisterPlugin, it gets canceled.
//
// Must use pointers to functions because the entries have to
// be comparable.
pendingWipes map[string]*context.CancelCauseFunc
}
var _ cache.PluginHandler = &RegistrationHandler{}
@ -62,12 +77,20 @@ var _ cache.PluginHandler = &RegistrationHandler{}
// Must only be called once per process because it manages global state.
// If a kubeClient is provided, then it synchronizes ResourceSlices
// with the resource information provided by plugins.
func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *RegistrationHandler {
func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1.Node, error), wipingDelay time.Duration) *RegistrationHandler {
// The context and thus logger should come from the caller.
return newRegistrationHandler(context.TODO(), kubeClient, getNode, wipingDelay)
}
func newRegistrationHandler(ctx context.Context, kubeClient kubernetes.Interface, getNode func() (*v1.Node, error), wipingDelay time.Duration) *RegistrationHandler {
ctx, cancel := context.WithCancelCause(ctx)
handler := &RegistrationHandler{
// The context and thus logger should come from the caller.
backgroundCtx: klog.NewContext(context.TODO(), klog.LoggerWithName(klog.TODO(), "DRA registration handler")),
backgroundCtx: klog.NewContext(ctx, klog.LoggerWithName(klog.FromContext(ctx), "DRA registration handler")),
cancel: cancel,
kubeClient: kubeClient,
getNode: getNode,
wipingDelay: wipingDelay,
pendingWipes: make(map[string]*context.CancelCauseFunc),
}
// When kubelet starts up, no DRA driver has registered yet. None of
@ -77,19 +100,45 @@ func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1
// to start up.
//
// This has to run in the background.
go handler.wipeResourceSlices("")
handler.wg.Add(1)
go func() {
defer handler.wg.Done()
logger := klog.LoggerWithName(klog.FromContext(handler.backgroundCtx), "startup")
ctx := klog.NewContext(handler.backgroundCtx, logger)
handler.wipeResourceSlices(ctx, 0 /* no delay */, "" /* all drivers */)
}()
return handler
}
// Stop cancels any remaining background activities and blocks until all goroutines have stopped.
func (h *RegistrationHandler) Stop() {
h.cancel(errors.New("Stop was called"))
h.wg.Wait()
}
// wipeResourceSlices deletes ResourceSlices of the node, optionally just for a specific driver.
func (h *RegistrationHandler) wipeResourceSlices(driver string) {
// Wiping will delay for a while and can be canceled by canceling the context.
func (h *RegistrationHandler) wipeResourceSlices(ctx context.Context, delay time.Duration, driver string) {
if h.kubeClient == nil {
return
}
ctx := h.backgroundCtx
logger := klog.FromContext(ctx)
if delay != 0 {
// Before we start deleting, give the driver time to bounce back.
// Perhaps it got removed as part of a DaemonSet update and the
// replacement pod is about to start.
logger.V(4).Info("Starting to wait before wiping ResourceSlices", "delay", delay)
select {
case <-ctx.Done():
logger.V(4).Info("Aborting wiping of ResourceSlices", "reason", context.Cause(ctx))
case <-time.After(delay):
logger.V(4).Info("Starting to wipe ResourceSlices after waiting", "delay", delay)
}
}
backoff := wait.Backoff{
Duration: time.Second,
Factor: 2,
@ -148,10 +197,10 @@ func (h *RegistrationHandler) RegisterPlugin(pluginName string, endpoint string,
// into all log output related to the plugin.
ctx := h.backgroundCtx
logger := klog.FromContext(ctx)
logger = klog.LoggerWithValues(logger, "pluginName", pluginName)
logger = klog.LoggerWithValues(logger, "pluginName", pluginName, "endpoint", endpoint)
ctx = klog.NewContext(ctx, logger)
logger.V(3).Info("Register new DRA plugin", "endpoint", endpoint)
logger.V(3).Info("Register new DRA plugin")
chosenService, err := h.validateSupportedServices(pluginName, supportedServices)
if err != nil {
@ -179,9 +228,19 @@ func (h *RegistrationHandler) RegisterPlugin(pluginName string, endpoint string,
// Storing endpoint of newly registered DRA Plugin into the map, where plugin name will be the key
// all other DRA components will be able to get the actual socket of DRA plugins by its name.
if err := draPlugins.add(pluginInstance); err != nil {
cancel(err)
// No wrapping, the error already contains details.
return err
}
if oldPlugin, replaced := draPlugins.add(pluginInstance); replaced {
logger.V(1).Info("DRA plugin already registered, the old plugin was replaced and will be forgotten by the kubelet till the next kubelet restart", "oldEndpoint", oldPlugin.endpoint)
// Now cancel any pending ResourceSlice wiping for this plugin.
// Only needs to be done once.
h.mutex.Lock()
defer h.mutex.Unlock()
if cancel := h.pendingWipes[pluginName]; cancel != nil {
(*cancel)(errors.New("new plugin instance registered"))
delete(h.pendingWipes, pluginName)
}
return nil
@ -220,16 +279,53 @@ func (h *RegistrationHandler) validateSupportedServices(pluginName string, suppo
// DeRegisterPlugin is called when a plugin has removed its socket,
// signaling it is no longer available.
func (h *RegistrationHandler) DeRegisterPlugin(pluginName string) {
if p := draPlugins.delete(pluginName); p != nil {
func (h *RegistrationHandler) DeRegisterPlugin(pluginName, endpoint string) {
if p, last := draPlugins.remove(pluginName, endpoint); p != nil {
// This logger includes endpoint and pluginName.
logger := klog.FromContext(p.backgroundCtx)
logger.V(3).Info("Deregister DRA plugin", "endpoint", p.endpoint)
logger.V(3).Info("Deregister DRA plugin", "lastInstance", last)
if !last {
return
}
// Prepare for canceling the background wiping. This needs to run
// in the context of the registration handler, the one from
// the plugin is canceled.
logger = klog.FromContext(h.backgroundCtx)
logger = klog.LoggerWithName(logger, "driver-cleanup")
logger = klog.LoggerWithValues(logger, "pluginName", pluginName)
ctx, cancel := context.WithCancelCause(h.backgroundCtx)
ctx = klog.NewContext(ctx, logger)
// Clean up the ResourceSlices for the deleted Plugin since it
// may have died without doing so itself and might never come
// back.
go h.wipeResourceSlices(pluginName)
//
// May get canceled if the plugin comes back quickly enough
// (see RegisterPlugin).
h.mutex.Lock()
defer h.mutex.Unlock()
if cancel := h.pendingWipes[pluginName]; cancel != nil {
(*cancel)(errors.New("plugin deregistered a second time"))
}
h.pendingWipes[pluginName] = &cancel
h.wg.Add(1)
go func() {
defer h.wg.Done()
defer func() {
h.mutex.Lock()
defer h.mutex.Unlock()
// Cancel our own context, but remove it from the map only if it
// is the current entry. Perhaps it already got replaced.
cancel(errors.New("wiping done"))
if h.pendingWipes[pluginName] == &cancel {
delete(h.pendingWipes, pluginName)
}
}()
h.wipeResourceSlices(ctx, h.wipingDelay, pluginName)
}()
return
}

View File

@ -268,3 +268,11 @@ func (cm *FakeContainerManager) UpdateAllocatedResourcesStatus(pod *v1.Pod, stat
func (cm *FakeContainerManager) Updates() <-chan resourceupdates.Update {
return nil
}
func (cm *FakeContainerManager) PodHasExclusiveCPUs(pod *v1.Pod) bool {
return false
}
func (cm *FakeContainerManager) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
return false
}

View File

@ -23,7 +23,7 @@ import (
"path/filepath"
"strconv"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
libcontainercgroups "github.com/opencontainers/cgroups"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"

View File

@ -205,6 +205,7 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
m.allocatableMemory = m.policy.GetAllocatableMemory(m.state)
klog.V(4).InfoS("memorymanager started", "policy", m.policy.Name())
return nil
}
@ -248,7 +249,7 @@ func (m *manager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.
}
if numaNodes.Len() == 0 {
klog.V(5).InfoS("No allocation is available", "pod", klog.KObj(pod), "containerName", container.Name)
klog.V(5).InfoS("NUMA nodes not available for allocation", "pod", klog.KObj(pod), "containerName", container.Name)
return nil
}
@ -266,7 +267,7 @@ func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error {
// Call down into the policy to assign this container memory if required.
if err := m.policy.Allocate(m.state, pod, container); err != nil {
klog.ErrorS(err, "Allocate error")
klog.ErrorS(err, "Allocate error", "pod", klog.KObj(pod), "containerName", container.Name)
return err
}
return nil
@ -280,7 +281,7 @@ func (m *manager) RemoveContainer(containerID string) error {
// if error appears it means container entry already does not exist under the container map
podUID, containerName, err := m.containerMap.GetContainerRef(containerID)
if err != nil {
klog.InfoS("Failed to get container from container map", "containerID", containerID, "err", err)
klog.ErrorS(err, "Failed to get container from container map", "containerID", containerID)
return nil
}
@ -344,7 +345,7 @@ func (m *manager) removeStaleState() {
for podUID := range assignments {
for containerName := range assignments[podUID] {
if _, ok := activeContainers[podUID][containerName]; !ok {
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
klog.V(2).InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
m.policyRemoveContainerByRef(podUID, containerName)
}
}
@ -352,7 +353,7 @@ func (m *manager) removeStaleState() {
m.containerMap.Visit(func(podUID, containerName, containerID string) {
if _, ok := activeContainers[podUID][containerName]; !ok {
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
klog.V(2).InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
m.policyRemoveContainerByRef(podUID, containerName)
}
})

View File

@ -96,7 +96,9 @@ func (p *staticPolicy) Start(s state.State) error {
// Allocate call is idempotent
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
// allocate the memory only for guaranteed pods
if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed {
qos := v1qos.GetPodQOS(pod)
if qos != v1.PodQOSGuaranteed {
klog.V(5).InfoS("Exclusive memory allocation skipped, pod QoS is not guaranteed", "pod", klog.KObj(pod), "containerName", container.Name, "qos", qos)
return nil
}
@ -196,6 +198,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
// TODO: we should refactor our state structs to reflect the amount of the re-used memory
p.updateInitContainersMemoryBlocks(s, pod, container, containerBlocks)
klog.V(4).InfoS("Allocated exclusive memory", "pod", klog.KObj(pod), "containerName", container.Name)
return nil
}
@ -304,24 +307,24 @@ func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, re
}
if len(ctnBlocks) != len(reqRsrc) {
klog.ErrorS(nil, "The number of requested resources by the container differs from the number of memory blocks", "containerName", ctn.Name)
klog.InfoS("The number of requested resources by the container differs from the number of memory blocks", "pod", klog.KObj(pod), "containerName", ctn.Name)
return nil
}
for _, b := range ctnBlocks {
if _, ok := reqRsrc[b.Type]; !ok {
klog.ErrorS(nil, "Container requested resources do not have resource of this type", "containerName", ctn.Name, "type", b.Type)
klog.InfoS("Container requested resources but none available of this type", "pod", klog.KObj(pod), "containerName", ctn.Name, "type", b.Type)
return nil
}
if b.Size != reqRsrc[b.Type] {
klog.ErrorS(nil, "Memory already allocated with different numbers than requested", "podUID", pod.UID, "type", b.Type, "containerName", ctn.Name, "requestedResource", reqRsrc[b.Type], "allocatedSize", b.Size)
klog.InfoS("Memory already allocated with different numbers than requested", "pod", klog.KObj(pod), "containerName", ctn.Name, "type", b.Type, "requestedResource", reqRsrc[b.Type], "allocatedSize", b.Size)
return nil
}
containerNUMAAffinity, err := bitmask.NewBitMask(b.NUMAAffinity...)
if err != nil {
klog.ErrorS(err, "Failed to generate NUMA bitmask")
klog.ErrorS(err, "Failed to generate NUMA bitmask", "pod", klog.KObj(pod), "containerName", ctn.Name, "type", b.Type)
return nil
}
@ -447,7 +450,13 @@ func getRequestedResources(pod *v1.Pod, container *v1.Container) (map[v1.Resourc
// We should return this value because this is what kubelet agreed to allocate for the container
// and the value configured with runtime.
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
containerStatuses := pod.Status.ContainerStatuses
if podutil.IsRestartableInitContainer(container) {
if len(pod.Status.InitContainerStatuses) != 0 {
containerStatuses = append(containerStatuses, pod.Status.InitContainerStatuses...)
}
}
if cs, ok := podutil.GetContainerStatus(containerStatuses, container.Name); ok {
resources = cs.AllocatedResources
}
}
@ -654,36 +663,36 @@ func (p *staticPolicy) validateState(s state.State) error {
func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool {
if len(ms1) != len(ms2) {
klog.ErrorS(nil, "Node states are different", "lengthNode1", len(ms1), "lengthNode2", len(ms2))
klog.InfoS("Node states were different", "lengthNode1", len(ms1), "lengthNode2", len(ms2))
return false
}
for nodeID, nodeState1 := range ms1 {
nodeState2, ok := ms2[nodeID]
if !ok {
klog.ErrorS(nil, "Node state does not have node ID", "nodeID", nodeID)
klog.InfoS("Node state didn't have node ID", "nodeID", nodeID)
return false
}
if nodeState1.NumberOfAssignments != nodeState2.NumberOfAssignments {
klog.ErrorS(nil, "Node states number of assignments are different", "assignment1", nodeState1.NumberOfAssignments, "assignment2", nodeState2.NumberOfAssignments)
klog.InfoS("Node state had a different number of memory assignments.", "assignment1", nodeState1.NumberOfAssignments, "assignment2", nodeState2.NumberOfAssignments)
return false
}
if !areGroupsEqual(nodeState1.Cells, nodeState2.Cells) {
klog.ErrorS(nil, "Node states groups are different", "stateNode1", nodeState1.Cells, "stateNode2", nodeState2.Cells)
klog.InfoS("Node states had different groups", "stateNode1", nodeState1.Cells, "stateNode2", nodeState2.Cells)
return false
}
if len(nodeState1.MemoryMap) != len(nodeState2.MemoryMap) {
klog.ErrorS(nil, "Node states memory map have different lengths", "lengthNode1", len(nodeState1.MemoryMap), "lengthNode2", len(nodeState2.MemoryMap))
klog.InfoS("Node state had memory maps of different lengths", "lengthNode1", len(nodeState1.MemoryMap), "lengthNode2", len(nodeState2.MemoryMap))
return false
}
for resourceName, memoryState1 := range nodeState1.MemoryMap {
memoryState2, ok := nodeState2.MemoryMap[resourceName]
if !ok {
klog.ErrorS(nil, "Memory state does not have resource", "resource", resourceName)
klog.InfoS("Memory state didn't have resource", "resource", resourceName)
return false
}
@ -701,11 +710,11 @@ func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool {
}
if tmpState1.Free != tmpState2.Free {
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "free", "free1", tmpState1.Free, "free2", tmpState2.Free, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
klog.InfoS("NUMA node and resource had different memory states", "node", nodeID, "resource", resourceName, "field", "free", "free1", tmpState1.Free, "free2", tmpState2.Free, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
return false
}
if tmpState1.Reserved != tmpState2.Reserved {
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "reserved", "reserved1", tmpState1.Reserved, "reserved2", tmpState2.Reserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
klog.InfoS("NUMA node and resource had different memory states", "node", nodeID, "resource", resourceName, "field", "reserved", "reserved1", tmpState1.Reserved, "reserved2", tmpState2.Reserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
return false
}
}
@ -715,17 +724,17 @@ func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool {
func areMemoryStatesEqual(memoryState1, memoryState2 *state.MemoryTable, nodeID int, resourceName v1.ResourceName) bool {
if memoryState1.TotalMemSize != memoryState2.TotalMemSize {
klog.ErrorS(nil, "Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "TotalMemSize", "TotalMemSize1", memoryState1.TotalMemSize, "TotalMemSize2", memoryState2.TotalMemSize, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "TotalMemSize", "TotalMemSize1", memoryState1.TotalMemSize, "TotalMemSize2", memoryState2.TotalMemSize, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
return false
}
if memoryState1.SystemReserved != memoryState2.SystemReserved {
klog.ErrorS(nil, "Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "SystemReserved", "SystemReserved1", memoryState1.SystemReserved, "SystemReserved2", memoryState2.SystemReserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "SystemReserved", "SystemReserved1", memoryState1.SystemReserved, "SystemReserved2", memoryState2.SystemReserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
return false
}
if memoryState1.Allocatable != memoryState2.Allocatable {
klog.ErrorS(nil, "Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "Allocatable", "Allocatable1", memoryState1.Allocatable, "Allocatable2", memoryState2.Allocatable, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "Allocatable", "Allocatable1", memoryState1.Allocatable, "Allocatable2", memoryState2.Allocatable, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
return false
}
return true

View File

@ -131,7 +131,7 @@ func (sc *stateCheckpoint) SetMachineState(memoryMap NUMANodeMap) {
sc.cache.SetMachineState(memoryMap)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint")
}
}
@ -143,7 +143,7 @@ func (sc *stateCheckpoint) SetMemoryBlocks(podUID string, containerName string,
sc.cache.SetMemoryBlocks(podUID, containerName, blocks)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName)
}
}
@ -155,7 +155,7 @@ func (sc *stateCheckpoint) SetMemoryAssignments(assignments ContainerMemoryAssig
sc.cache.SetMemoryAssignments(assignments)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint")
}
}
@ -167,7 +167,7 @@ func (sc *stateCheckpoint) Delete(podUID string, containerName string) {
sc.cache.Delete(podUID, containerName)
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName)
}
}
@ -179,6 +179,6 @@ func (sc *stateCheckpoint) ClearState() {
sc.cache.ClearState()
err := sc.storeState()
if err != nil {
klog.InfoS("Store state to checkpoint error", "err", err)
klog.ErrorS(err, "Failed to store state to checkpoint")
}
}

View File

@ -94,6 +94,7 @@ func (s *stateMemory) SetMemoryAssignments(assignments ContainerMemoryAssignment
defer s.Unlock()
s.assignments = assignments.Clone()
klog.V(5).InfoS("Updated Memory assignments", "assignments", assignments)
}
// Delete deletes corresponding Blocks from ContainerMemoryAssignments

View File

@ -23,7 +23,7 @@ import (
"path"
"strings"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
libcontainercgroups "github.com/opencontainers/cgroups"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
@ -55,6 +55,8 @@ type podContainerManagerImpl struct {
// cpuCFSQuotaPeriod is the cfs period value, cfs_period_us, setting per
// node for all containers in usec
cpuCFSQuotaPeriod uint64
// podContainerManager is the ContainerManager running on the machine
podContainerManager ContainerManager
}
// Make sure that podContainerManagerImpl implements the PodContainerManager interface
@ -73,6 +75,11 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
// check if container already exist
alreadyExists := m.Exists(pod)
if !alreadyExists {
enforceCPULimits := m.enforceCPULimits
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableCPUQuotaWithExclusiveCPUs) && m.podContainerManager.PodHasExclusiveCPUs(pod) {
klog.V(2).InfoS("Disabled CFS quota", "pod", klog.KObj(pod))
enforceCPULimits = false
}
enforceMemoryQoS := false
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() {
@ -82,7 +89,7 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
podContainerName, _ := m.GetPodContainerName(pod)
containerConfig := &CgroupConfig{
Name: podContainerName,
ResourceParameters: ResourceConfigForPod(pod, m.enforceCPULimits, m.cpuCFSQuotaPeriod, enforceMemoryQoS),
ResourceParameters: ResourceConfigForPod(pod, enforceCPULimits, m.cpuCFSQuotaPeriod, enforceMemoryQoS),
}
if m.podPidsLimit > 0 {
containerConfig.ResourceParameters.PidsLimit = &m.podPidsLimit

View File

@ -29,7 +29,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
units "github.com/docker/go-units"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
libcontainercgroups "github.com/opencontainers/cgroups"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-helpers/resource"

View File

@ -45,7 +45,7 @@ func NewFakeManagerWithHint(hint *TopologyHint) Manager {
// NewFakeManagerWithPolicy returns an instance of fake topology manager with specified policy
func NewFakeManagerWithPolicy(policy Policy) Manager {
klog.InfoS("NewFakeManagerWithPolicy")
klog.InfoS("NewFakeManagerWithPolicy", "policy", policy.Name())
return &fakeManager{
policy: policy,
}

View File

@ -47,11 +47,11 @@ func CheckPolicyOptionAvailable(option string) error {
}
if alphaOptions.Has(option) && !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManagerPolicyAlphaOptions) {
return fmt.Errorf("Topology Manager Policy Alpha-level Options not enabled, but option %q provided", option)
return fmt.Errorf("topology manager policy alpha-level options not enabled, but option %q provided", option)
}
if betaOptions.Has(option) && !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManagerPolicyBetaOptions) {
return fmt.Errorf("Topology Manager Policy Beta-level Options not enabled, but option %q provided", option)
return fmt.Errorf("topology manager policy beta-level options not enabled, but option %q provided", option)
}
return nil

View File

@ -50,6 +50,9 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
if !admit {
if IsAlignmentGuaranteed(s.policy) {
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Inc()
}
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
return admission.GetPodAdmitResult(&TopologyAffinityError{})
}
@ -63,6 +66,7 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
}
if IsAlignmentGuaranteed(s.policy) {
klog.V(4).InfoS("Resource alignment at container scope guaranteed", "pod", klog.KObj(pod))
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Inc()
}
}
@ -84,6 +88,6 @@ func (s *containerScope) accumulateProvidersHints(pod *v1.Pod, container *v1.Con
func (s *containerScope) calculateAffinity(pod *v1.Pod, container *v1.Container) (TopologyHint, bool) {
providersHints := s.accumulateProvidersHints(pod, container)
bestHint, admit := s.policy.Merge(providersHints)
klog.InfoS("ContainerTopologyHint", "bestHint", bestHint)
klog.InfoS("ContainerTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
return bestHint, admit
}

View File

@ -48,6 +48,10 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
bestHint, admit := s.calculateAffinity(pod)
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod))
if !admit {
if IsAlignmentGuaranteed(s.policy) {
// increment only if we know we allocate aligned resources.
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Inc()
}
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
return admission.GetPodAdmitResult(&TopologyAffinityError{})
}
@ -64,6 +68,7 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
}
if IsAlignmentGuaranteed(s.policy) {
// increment only if we know we allocate aligned resources.
klog.V(4).InfoS("Resource alignment at pod scope guaranteed", "pod", klog.KObj(pod))
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Inc()
}
return admission.GetPodAdmitResult(nil)
@ -84,6 +89,6 @@ func (s *podScope) accumulateProvidersHints(pod *v1.Pod) []map[string][]Topology
func (s *podScope) calculateAffinity(pod *v1.Pod) (TopologyHint, bool) {
providersHints := s.accumulateProvidersHints(pod)
bestHint, admit := s.policy.Merge(providersHints)
klog.InfoS("PodTopologyHint", "bestHint", bestHint)
klog.InfoS("PodTopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod))
return bestHint, admit
}

View File

@ -188,9 +188,19 @@ func NewManager(topology []cadvisorapi.Node, topologyPolicyName string, topology
scope: scope,
}
manager.initializeMetrics()
return manager, nil
}
func (m *manager) initializeMetrics() {
// ensure the values exist
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Add(0)
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Add(0)
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Add(0)
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Add(0)
}
func (m *manager) GetAffinity(podUID string, containerName string) TopologyHint {
return m.scope.GetAffinity(podUID, containerName)
}
@ -212,11 +222,13 @@ func (m *manager) RemoveContainer(containerID string) error {
}
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
klog.V(4).InfoS("Topology manager admission check", "pod", klog.KObj(attrs.Pod))
metrics.TopologyManagerAdmissionRequestsTotal.Inc()
startTime := time.Now()
podAdmitResult := m.scope.Admit(attrs.Pod)
metrics.TopologyManagerAdmissionDuration.Observe(float64(time.Since(startTime).Milliseconds()))
klog.V(4).InfoS("Pod Admit Result", "Message", podAdmitResult.Message, "pod", klog.KObj(attrs.Pod))
return podAdmitResult
}

View File

@ -21,7 +21,7 @@ import (
libcontainerutils "k8s.io/kubernetes/third_party/forked/libcontainer/utils"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
libcontainercgroups "github.com/opencontainers/cgroups"
)
const (