rebase: update K8s packages to v0.32.1

Update K8s packages in go.mod to v0.32.1 Signed-off-by: Praveen M <m.praveen@ibm.com>
2025-06-14 18:53:35 +00:00 · 2025-01-16 09:41:46 +05:30
parent 5aef21ea4e
commit 7eb99fc6c9
2442 changed files with 273386 additions and 47788 deletions
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/OWNERS
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/OWNERS
@ -0,0 +1,2 @@
+labels:
+  - wg/device-management
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/claiminfo.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/claiminfo.go
@ -0,0 +1,222 @@
+/*
+Copyright 2022 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package dra
+
+import (
+	"errors"
+	"fmt"
+	"slices"
+	"sync"
+
+	resourceapi "k8s.io/api/resource/v1beta1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
+	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
+)
+
+// ClaimInfo holds information required
+// to prepare and unprepare a resource claim.
+// +k8s:deepcopy-gen=true
+type ClaimInfo struct {
+	state.ClaimInfoState
+	prepared bool
+}
+
+// claimInfoCache is a cache of processed resource claims keyed by namespace/claimname.
+type claimInfoCache struct {
+	sync.RWMutex
+	checkpointer state.Checkpointer
+	claimInfo    map[string]*ClaimInfo
+}
+
+// newClaimInfoFromClaim creates a new claim info from a resource claim.
+// It verifies that the kubelet can handle the claim.
+func newClaimInfoFromClaim(claim *resourceapi.ResourceClaim) (*ClaimInfo, error) {
+	claimInfoState := state.ClaimInfoState{
+		ClaimUID:    claim.UID,
+		ClaimName:   claim.Name,
+		Namespace:   claim.Namespace,
+		PodUIDs:     sets.New[string](),
+		DriverState: make(map[string]state.DriverState),
+	}
+	if claim.Status.Allocation == nil {
+		return nil, errors.New("not allocated")
+	}
+	for _, result := range claim.Status.Allocation.Devices.Results {
+		claimInfoState.DriverState[result.Driver] = state.DriverState{}
+	}
+	info := &ClaimInfo{
+		ClaimInfoState: claimInfoState,
+		prepared:       false,
+	}
+	return info, nil
+}
+
+// newClaimInfoFromClaim creates a new claim info from a checkpointed claim info state object.
+func newClaimInfoFromState(state *state.ClaimInfoState) *ClaimInfo {
+	info := &ClaimInfo{
+		ClaimInfoState: *state.DeepCopy(),
+		prepared:       false,
+	}
+	return info
+}
+
+// setCDIDevices adds a set of CDI devices to the claim info.
+func (info *ClaimInfo) addDevice(driverName string, deviceState state.Device) {
+	if info.DriverState == nil {
+		info.DriverState = make(map[string]state.DriverState)
+	}
+	driverState := info.DriverState[driverName]
+	driverState.Devices = append(driverState.Devices, deviceState)
+	info.DriverState[driverName] = driverState
+}
+
+// addPodReference adds a pod reference to the claim info.
+func (info *ClaimInfo) addPodReference(podUID types.UID) {
+	info.PodUIDs.Insert(string(podUID))
+}
+
+// hasPodReference checks if a pod reference exists in the claim info.
+func (info *ClaimInfo) hasPodReference(podUID types.UID) bool {
+	return info.PodUIDs.Has(string(podUID))
+}
+
+// deletePodReference deletes a pod reference from the claim info.
+func (info *ClaimInfo) deletePodReference(podUID types.UID) {
+	info.PodUIDs.Delete(string(podUID))
+}
+
+// setPrepared marks the claim info as prepared.
+func (info *ClaimInfo) setPrepared() {
+	info.prepared = true
+}
+
+// isPrepared checks if claim info is prepared or not.
+func (info *ClaimInfo) isPrepared() bool {
+	return info.prepared
+}
+
+// newClaimInfoCache creates a new claim info cache object, pre-populated from a checkpoint (if present).
+func newClaimInfoCache(stateDir, checkpointName string) (*claimInfoCache, error) {
+	checkpointer, err := state.NewCheckpointer(stateDir, checkpointName)
+	if err != nil {
+		return nil, fmt.Errorf("could not initialize checkpoint manager, please drain node and remove dra state file, err: %w", err)
+	}
+
+	checkpoint, err := checkpointer.GetOrCreate()
+	if err != nil {
+		return nil, fmt.Errorf("error calling GetOrCreate() on checkpoint state: %w", err)
+	}
+
+	cache := &claimInfoCache{
+		checkpointer: checkpointer,
+		claimInfo:    make(map[string]*ClaimInfo),
+	}
+
+	entries, err := checkpoint.GetClaimInfoStateList()
+	if err != nil {
+		return nil, fmt.Errorf("error calling GetEntries() on checkpoint: %w", err)
+
+	}
+	for _, entry := range entries {
+		info := newClaimInfoFromState(&entry)
+		cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
+	}
+
+	return cache, nil
+}
+
+// withLock runs a function while holding the claimInfoCache lock.
+func (cache *claimInfoCache) withLock(f func() error) error {
+	cache.Lock()
+	defer cache.Unlock()
+	return f()
+}
+
+// withRLock runs a function while holding the claimInfoCache rlock.
+func (cache *claimInfoCache) withRLock(f func() error) error {
+	cache.RLock()
+	defer cache.RUnlock()
+	return f()
+}
+
+// add adds a new claim info object into the claim info cache.
+func (cache *claimInfoCache) add(info *ClaimInfo) *ClaimInfo {
+	cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
+	return info
+}
+
+// contains checks to see if a specific claim info object is already in the cache.
+func (cache *claimInfoCache) contains(claimName, namespace string) bool {
+	_, exists := cache.claimInfo[namespace+"/"+claimName]
+	return exists
+}
+
+// get gets a specific claim info object from the cache.
+func (cache *claimInfoCache) get(claimName, namespace string) (*ClaimInfo, bool) {
+	info, exists := cache.claimInfo[namespace+"/"+claimName]
+	return info, exists
+}
+
+// delete deletes a specific claim info object from the cache.
+func (cache *claimInfoCache) delete(claimName, namespace string) {
+	delete(cache.claimInfo, namespace+"/"+claimName)
+}
+
+// hasPodReference checks if there is at least one claim
+// that is referenced by the pod with the given UID
+// This function is used indirectly by the status manager
+// to check if pod can enter termination status
+func (cache *claimInfoCache) hasPodReference(uid types.UID) bool {
+	for _, claimInfo := range cache.claimInfo {
+		if claimInfo.hasPodReference(uid) {
+			return true
+		}
+	}
+	return false
+}
+
+// syncToCheckpoint syncs the full claim info cache state to a checkpoint.
+func (cache *claimInfoCache) syncToCheckpoint() error {
+	claimInfoStateList := make(state.ClaimInfoStateList, 0, len(cache.claimInfo))
+	for _, infoClaim := range cache.claimInfo {
+		claimInfoStateList = append(claimInfoStateList, infoClaim.ClaimInfoState)
+	}
+	checkpoint, err := state.NewCheckpoint(claimInfoStateList)
+	if err != nil {
+		return err
+	}
+	return cache.checkpointer.Store(checkpoint)
+}
+
+// cdiDevicesAsList returns a list of CDIDevices from the provided claim info.
+// When the request name is non-empty, only devices relevant for that request
+// are returned.
+func (info *ClaimInfo) cdiDevicesAsList(requestName string) []kubecontainer.CDIDevice {
+	var cdiDevices []kubecontainer.CDIDevice
+	for _, driverData := range info.DriverState {
+		for _, device := range driverData.Devices {
+			if requestName == "" || len(device.RequestNames) == 0 || slices.Contains(device.RequestNames, requestName) {
+				for _, cdiDeviceID := range device.CDIDeviceIDs {
+					cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: cdiDeviceID})
+				}
+			}
+		}
+	}
+	return cdiDevices
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/manager.go
@ -0,0 +1,553 @@
+/*
+Copyright 2022 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package dra
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	resourceapi "k8s.io/api/resource/v1beta1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/wait"
+	clientset "k8s.io/client-go/kubernetes"
+	"k8s.io/dynamic-resource-allocation/resourceclaim"
+	"k8s.io/klog/v2"
+	drapb "k8s.io/kubelet/pkg/apis/dra/v1beta1"
+	dra "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
+	"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
+	"k8s.io/kubernetes/pkg/kubelet/config"
+	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
+	"k8s.io/kubernetes/pkg/kubelet/metrics"
+	"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
+)
+
+// draManagerStateFileName is the file name where dra manager stores its state
+const draManagerStateFileName = "dra_manager_state"
+
+// defaultReconcilePeriod is the default reconciliation period to keep all claim info state in sync.
+const defaultReconcilePeriod = 60 * time.Second
+
+// ActivePodsFunc is a function that returns a list of pods to reconcile.
+type ActivePodsFunc func() []*v1.Pod
+
+// GetNodeFunc is a function that returns the node object using the kubelet's node lister.
+type GetNodeFunc func() (*v1.Node, error)
+
+// ManagerImpl is the structure in charge of managing DRA drivers.
+type ManagerImpl struct {
+	// cache contains cached claim info
+	cache *claimInfoCache
+
+	// reconcilePeriod is the duration between calls to reconcileLoop.
+	reconcilePeriod time.Duration
+
+	// activePods is a method for listing active pods on the node
+	// so all claim info state can be updated in the reconciliation loop.
+	activePods ActivePodsFunc
+
+	// sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness.
+	// We use it to determine when we can treat pods as inactive and react appropriately.
+	sourcesReady config.SourcesReady
+
+	// KubeClient reference
+	kubeClient clientset.Interface
+
+	// getNode is a function that returns the node object using the kubelet's node lister.
+	getNode GetNodeFunc
+}
+
+// NewManagerImpl creates a new manager.
+func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, nodeName types.NodeName) (*ManagerImpl, error) {
+	claimInfoCache, err := newClaimInfoCache(stateFileDirectory, draManagerStateFileName)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create claimInfo cache: %w", err)
+	}
+
+	// TODO: for now the reconcile period is not configurable.
+	// We should consider making it configurable in the future.
+	reconcilePeriod := defaultReconcilePeriod
+
+	manager := &ManagerImpl{
+		cache:           claimInfoCache,
+		kubeClient:      kubeClient,
+		reconcilePeriod: reconcilePeriod,
+		activePods:      nil,
+		sourcesReady:    nil,
+	}
+
+	return manager, nil
+}
+
+func (m *ManagerImpl) GetWatcherHandler() cache.PluginHandler {
+	return cache.PluginHandler(dra.NewRegistrationHandler(m.kubeClient, m.getNode))
+}
+
+// Start starts the reconcile loop of the manager.
+func (m *ManagerImpl) Start(ctx context.Context, activePods ActivePodsFunc, getNode GetNodeFunc, sourcesReady config.SourcesReady) error {
+	m.activePods = activePods
+	m.getNode = getNode
+	m.sourcesReady = sourcesReady
+	go wait.UntilWithContext(ctx, func(ctx context.Context) { m.reconcileLoop(ctx) }, m.reconcilePeriod)
+	return nil
+}
+
+// reconcileLoop ensures that any stale state in the manager's claimInfoCache gets periodically reconciled.
+func (m *ManagerImpl) reconcileLoop(ctx context.Context) {
+	logger := klog.FromContext(ctx)
+	// Only once all sources are ready do we attempt to reconcile.
+	// This ensures that the call to m.activePods() below will succeed with
+	// the actual active pods list.
+	if m.sourcesReady == nil || !m.sourcesReady.AllReady() {
+		return
+	}
+
+	// Get the full list of active pods.
+	activePods := sets.New[string]()
+	for _, p := range m.activePods() {
+		activePods.Insert(string(p.UID))
+	}
+
+	// Get the list of inactive pods still referenced by any claimInfos.
+	type podClaims struct {
+		uid        types.UID
+		namespace  string
+		claimNames []string
+	}
+	inactivePodClaims := make(map[string]*podClaims)
+	m.cache.RLock()
+	for _, claimInfo := range m.cache.claimInfo {
+		for podUID := range claimInfo.PodUIDs {
+			if activePods.Has(podUID) {
+				continue
+			}
+			if inactivePodClaims[podUID] == nil {
+				inactivePodClaims[podUID] = &podClaims{
+					uid:        types.UID(podUID),
+					namespace:  claimInfo.Namespace,
+					claimNames: []string{},
+				}
+			}
+			inactivePodClaims[podUID].claimNames = append(inactivePodClaims[podUID].claimNames, claimInfo.ClaimName)
+		}
+	}
+	m.cache.RUnlock()
+
+	// Loop through all inactive pods and call UnprepareResources on them.
+	for _, podClaims := range inactivePodClaims {
+		if err := m.unprepareResources(ctx, podClaims.uid, podClaims.namespace, podClaims.claimNames); err != nil {
+			logger.Info("Unpreparing pod resources in reconcile loop failed, will retry", "podUID", podClaims.uid, "err", err)
+		}
+	}
+}
+
+// PrepareResources attempts to prepare all of the required resources
+// for the input container, issue NodePrepareResources rpc requests
+// for each new resource requirement, process their responses and update the cached
+// containerResources on success.
+func (m *ManagerImpl) PrepareResources(ctx context.Context, pod *v1.Pod) error {
+	startTime := time.Now()
+	err := m.prepareResources(ctx, pod)
+	metrics.DRAOperationsDuration.WithLabelValues("PrepareResources", strconv.FormatBool(err == nil)).Observe(time.Since(startTime).Seconds())
+	return err
+}
+
+func (m *ManagerImpl) prepareResources(ctx context.Context, pod *v1.Pod) error {
+	logger := klog.FromContext(ctx)
+	batches := make(map[string][]*drapb.Claim)
+	resourceClaims := make(map[types.UID]*resourceapi.ResourceClaim)
+	for i := range pod.Spec.ResourceClaims {
+		podClaim := &pod.Spec.ResourceClaims[i]
+		logger.V(3).Info("Processing resource", "pod", klog.KObj(pod), "podClaim", podClaim.Name)
+		claimName, mustCheckOwner, err := resourceclaim.Name(pod, podClaim)
+		if err != nil {
+			return fmt.Errorf("prepare resource claim: %w", err)
+		}
+
+		if claimName == nil {
+			// Nothing to do.
+			logger.V(5).Info("No need to prepare resources, no claim generated", "pod", klog.KObj(pod), "podClaim", podClaim.Name)
+			continue
+		}
+		// Query claim object from the API server
+		resourceClaim, err := m.kubeClient.ResourceV1beta1().ResourceClaims(pod.Namespace).Get(
+			ctx,
+			*claimName,
+			metav1.GetOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to fetch ResourceClaim %s referenced by pod %s: %w", *claimName, pod.Name, err)
+		}
+
+		if mustCheckOwner {
+			if err = resourceclaim.IsForPod(pod, resourceClaim); err != nil {
+				return err
+			}
+		}
+
+		// Check if pod is in the ReservedFor for the claim
+		if !resourceclaim.IsReservedForPod(pod, resourceClaim) {
+			return fmt.Errorf("pod %s(%s) is not allowed to use resource claim %s(%s)",
+				pod.Name, pod.UID, *claimName, resourceClaim.UID)
+		}
+
+		// Atomically perform some operations on the claimInfo cache.
+		err = m.cache.withLock(func() error {
+			// Get a reference to the claim info for this claim from the cache.
+			// If there isn't one yet, then add it to the cache.
+			claimInfo, exists := m.cache.get(resourceClaim.Name, resourceClaim.Namespace)
+			if !exists {
+				ci, err := newClaimInfoFromClaim(resourceClaim)
+				if err != nil {
+					return fmt.Errorf("claim %s: %w", klog.KObj(resourceClaim), err)
+				}
+				claimInfo = m.cache.add(ci)
+				logger.V(6).Info("Created new claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo)
+			} else {
+				logger.V(6).Info("Found existing claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo)
+			}
+
+			// Add a reference to the current pod in the claim info.
+			claimInfo.addPodReference(pod.UID)
+
+			// Checkpoint to ensure all claims we plan to prepare are tracked.
+			// If something goes wrong and the newly referenced pod gets
+			// deleted without a successful prepare call, we will catch
+			// that in the reconcile loop and take the appropriate action.
+			if err := m.cache.syncToCheckpoint(); err != nil {
+				return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
+			}
+
+			// If this claim is already prepared, there is no need to prepare it again.
+			if claimInfo.isPrepared() {
+				logger.V(5).Info("Resources already prepared", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim))
+				return nil
+			}
+
+			// This saved claim will be used to update ClaimInfo cache
+			// after NodePrepareResources GRPC succeeds
+			resourceClaims[claimInfo.ClaimUID] = resourceClaim
+
+			// Loop through all drivers and prepare for calling NodePrepareResources.
+			claim := &drapb.Claim{
+				Namespace: claimInfo.Namespace,
+				UID:       string(claimInfo.ClaimUID),
+				Name:      claimInfo.ClaimName,
+			}
+			for driverName := range claimInfo.DriverState {
+				batches[driverName] = append(batches[driverName], claim)
+			}
+
+			return nil
+		})
+		if err != nil {
+			return fmt.Errorf("locked cache operation: %w", err)
+		}
+	}
+
+	// Call NodePrepareResources for all claims in each batch.
+	// If there is any error, processing gets aborted.
+	// We could try to continue, but that would make the code more complex.
+	for driverName, claims := range batches {
+		// Call NodePrepareResources RPC for all resource handles.
+		client, err := dra.NewDRAPluginClient(driverName)
+		if err != nil {
+			return fmt.Errorf("failed to get gRPC client for driver %s: %w", driverName, err)
+		}
+		response, err := client.NodePrepareResources(ctx, &drapb.NodePrepareResourcesRequest{Claims: claims})
+		if err != nil {
+			// General error unrelated to any particular claim.
+			return fmt.Errorf("NodePrepareResources failed: %w", err)
+		}
+		for claimUID, result := range response.Claims {
+			reqClaim := lookupClaimRequest(claims, claimUID)
+			if reqClaim == nil {
+				return fmt.Errorf("NodePrepareResources returned result for unknown claim UID %s", claimUID)
+			}
+			if result.GetError() != "" {
+				return fmt.Errorf("NodePrepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
+			}
+
+			claim := resourceClaims[types.UID(claimUID)]
+
+			// Add the prepared CDI devices to the claim info
+			err := m.cache.withLock(func() error {
+				info, exists := m.cache.get(claim.Name, claim.Namespace)
+				if !exists {
+					return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
+				}
+				for _, device := range result.GetDevices() {
+					info.addDevice(driverName, state.Device{PoolName: device.PoolName, DeviceName: device.DeviceName, RequestNames: device.RequestNames, CDIDeviceIDs: device.CDIDeviceIDs})
+				}
+				return nil
+			})
+			if err != nil {
+				return fmt.Errorf("locked cache operation: %w", err)
+			}
+		}
+
+		unfinished := len(claims) - len(response.Claims)
+		if unfinished != 0 {
+			return fmt.Errorf("NodePrepareResources left out %d claims", unfinished)
+		}
+	}
+
+	// Atomically perform some operations on the claimInfo cache.
+	err := m.cache.withLock(func() error {
+		// Mark all pod claims as prepared.
+		for _, claim := range resourceClaims {
+			info, exists := m.cache.get(claim.Name, claim.Namespace)
+			if !exists {
+				return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
+			}
+			info.setPrepared()
+		}
+
+		// Checkpoint to ensure all prepared claims are tracked with their list
+		// of CDI devices attached.
+		if err := m.cache.syncToCheckpoint(); err != nil {
+			return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
+		}
+
+		return nil
+	})
+	if err != nil {
+		return fmt.Errorf("locked cache operation: %w", err)
+	}
+
+	return nil
+}
+
+func lookupClaimRequest(claims []*drapb.Claim, claimUID string) *drapb.Claim {
+	for _, claim := range claims {
+		if claim.UID == claimUID {
+			return claim
+		}
+	}
+	return nil
+}
+
+// GetResources gets a ContainerInfo object from the claimInfo cache.
+// This information is used by the caller to update a container config.
+func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error) {
+	cdiDevices := []kubecontainer.CDIDevice{}
+
+	for i := range pod.Spec.ResourceClaims {
+		podClaim := &pod.Spec.ResourceClaims[i]
+		claimName, _, err := resourceclaim.Name(pod, podClaim)
+		if err != nil {
+			return nil, fmt.Errorf("list resource claims: %w", err)
+		}
+		// The claim name might be nil if no underlying resource claim
+		// was generated for the referenced claim. There are valid use
+		// cases when this might happen, so we simply skip it.
+		if claimName == nil {
+			continue
+		}
+		for _, claim := range container.Resources.Claims {
+			if podClaim.Name != claim.Name {
+				continue
+			}
+
+			err := m.cache.withRLock(func() error {
+				claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
+				if !exists {
+					return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
+				}
+
+				// As of Kubernetes 1.31, CDI device IDs are not passed via annotations anymore.
+				cdiDevices = append(cdiDevices, claimInfo.cdiDevicesAsList(claim.Request)...)
+
+				return nil
+			})
+			if err != nil {
+				return nil, fmt.Errorf("locked cache operation: %w", err)
+			}
+		}
+	}
+	return &ContainerInfo{CDIDevices: cdiDevices}, nil
+}
+
+// UnprepareResources calls a driver's NodeUnprepareResource API for each resource claim owned by a pod.
+// This function is idempotent and may be called multiple times against the same pod.
+// As such, calls to the underlying NodeUnprepareResource API are skipped for claims that have
+// already been successfully unprepared.
+func (m *ManagerImpl) UnprepareResources(ctx context.Context, pod *v1.Pod) error {
+	var err error = nil
+	defer func(startTime time.Time) {
+		metrics.DRAOperationsDuration.WithLabelValues("UnprepareResources", strconv.FormatBool(err != nil)).Observe(time.Since(startTime).Seconds())
+	}(time.Now())
+	var claimNames []string
+	for i := range pod.Spec.ResourceClaims {
+		claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
+		if err != nil {
+			return fmt.Errorf("unprepare resource claim: %w", err)
+		}
+		// The claim name might be nil if no underlying resource claim
+		// was generated for the referenced claim. There are valid use
+		// cases when this might happen, so we simply skip it.
+		if claimName == nil {
+			continue
+		}
+		claimNames = append(claimNames, *claimName)
+	}
+	err = m.unprepareResources(ctx, pod.UID, pod.Namespace, claimNames)
+	return err
+}
+
+func (m *ManagerImpl) unprepareResources(ctx context.Context, podUID types.UID, namespace string, claimNames []string) error {
+	logger := klog.FromContext(ctx)
+	batches := make(map[string][]*drapb.Claim)
+	claimNamesMap := make(map[types.UID]string)
+	for _, claimName := range claimNames {
+		// Atomically perform some operations on the claimInfo cache.
+		err := m.cache.withLock(func() error {
+			// Get the claim info from the cache
+			claimInfo, exists := m.cache.get(claimName, namespace)
+
+			// Skip calling NodeUnprepareResource if claim info is not cached
+			if !exists {
+				return nil
+			}
+
+			// Skip calling NodeUnprepareResource if other pods are still referencing it
+			if len(claimInfo.PodUIDs) > 1 {
+				// We delay checkpointing of this change until
+				// UnprepareResources returns successfully. It is OK to do
+				// this because we will only return successfully from this call
+				// if the checkpoint has succeeded. That means if the kubelet
+				// is ever restarted before this checkpoint succeeds, we will
+				// simply call into this (idempotent) function again.
+				claimInfo.deletePodReference(podUID)
+				return nil
+			}
+
+			// This claimInfo name will be used to update ClaimInfo cache
+			// after NodeUnprepareResources GRPC succeeds
+			claimNamesMap[claimInfo.ClaimUID] = claimInfo.ClaimName
+
+			// Loop through all drivers and prepare for calling NodeUnprepareResources.
+			claim := &drapb.Claim{
+				Namespace: claimInfo.Namespace,
+				UID:       string(claimInfo.ClaimUID),
+				Name:      claimInfo.ClaimName,
+			}
+			for driverName := range claimInfo.DriverState {
+				batches[driverName] = append(batches[driverName], claim)
+			}
+
+			return nil
+		})
+		if err != nil {
+			return fmt.Errorf("locked cache operation: %w", err)
+		}
+	}
+
+	// Call NodeUnprepareResources for all claims in each batch.
+	// If there is any error, processing gets aborted.
+	// We could try to continue, but that would make the code more complex.
+	for driverName, claims := range batches {
+		// Call NodeUnprepareResources RPC for all resource handles.
+		client, err := dra.NewDRAPluginClient(driverName)
+		if err != nil {
+			return fmt.Errorf("get gRPC client for DRA driver %s: %w", driverName, err)
+		}
+		response, err := client.NodeUnprepareResources(ctx, &drapb.NodeUnprepareResourcesRequest{Claims: claims})
+		if err != nil {
+			// General error unrelated to any particular claim.
+			return fmt.Errorf("NodeUnprepareResources failed: %w", err)
+		}
+
+		for claimUID, result := range response.Claims {
+			reqClaim := lookupClaimRequest(claims, claimUID)
+			if reqClaim == nil {
+				return fmt.Errorf("NodeUnprepareResources returned result for unknown claim UID %s", claimUID)
+			}
+			if result.GetError() != "" {
+				return fmt.Errorf("NodeUnprepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
+			}
+		}
+
+		unfinished := len(claims) - len(response.Claims)
+		if unfinished != 0 {
+			return fmt.Errorf("NodeUnprepareResources left out %d claims", unfinished)
+		}
+	}
+
+	// Atomically perform some operations on the claimInfo cache.
+	err := m.cache.withLock(func() error {
+		// Delete all claimInfos from the cache that have just been unprepared.
+		for _, claimName := range claimNamesMap {
+			claimInfo, _ := m.cache.get(claimName, namespace)
+			m.cache.delete(claimName, namespace)
+			logger.V(6).Info("Deleted claim info cache entry", "claim", klog.KRef(namespace, claimName), "claimInfoEntry", claimInfo)
+		}
+
+		// Atomically sync the cache back to the checkpoint.
+		if err := m.cache.syncToCheckpoint(); err != nil {
+			return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
+		}
+		return nil
+	})
+	if err != nil {
+		return fmt.Errorf("locked cache operation: %w", err)
+	}
+
+	return nil
+}
+
+// PodMightNeedToUnprepareResources returns true if the pod might need to
+// unprepare resources
+func (m *ManagerImpl) PodMightNeedToUnprepareResources(uid types.UID) bool {
+	m.cache.Lock()
+	defer m.cache.Unlock()
+	return m.cache.hasPodReference(uid)
+}
+
+// GetContainerClaimInfos gets Container's ClaimInfo
+func (m *ManagerImpl) GetContainerClaimInfos(pod *v1.Pod, container *v1.Container) ([]*ClaimInfo, error) {
+	claimInfos := make([]*ClaimInfo, 0, len(pod.Spec.ResourceClaims))
+
+	for i, podResourceClaim := range pod.Spec.ResourceClaims {
+		claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
+		if err != nil {
+			return nil, fmt.Errorf("determine resource claim information: %w", err)
+		}
+
+		for _, claim := range container.Resources.Claims {
+			if podResourceClaim.Name != claim.Name {
+				continue
+			}
+
+			err := m.cache.withRLock(func() error {
+				claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
+				if !exists {
+					return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
+				}
+				claimInfos = append(claimInfos, claimInfo.DeepCopy())
+				return nil
+			})
+			if err != nil {
+				return nil, fmt.Errorf("locked cache operation: %w", err)
+			}
+		}
+	}
+	return claimInfos, nil
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugin.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugin.go
@ -0,0 +1,181 @@
+/*
+Copyright 2022 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package plugin
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net"
+	"sync"
+	"time"
+
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/connectivity"
+	"google.golang.org/grpc/credentials/insecure"
+	"google.golang.org/grpc/status"
+
+	"k8s.io/klog/v2"
+	drapbv1alpha4 "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
+	drapbv1beta1 "k8s.io/kubelet/pkg/apis/dra/v1beta1"
+	"k8s.io/kubernetes/pkg/kubelet/metrics"
+)
+
+// NewDRAPluginClient returns a wrapper around those gRPC methods of a DRA
+// driver kubelet plugin which need to be called by kubelet. The wrapper
+// handles gRPC connection management and logging. Connections are reused
+// across different NewDRAPluginClient calls.
+func NewDRAPluginClient(pluginName string) (*Plugin, error) {
+	if pluginName == "" {
+		return nil, fmt.Errorf("plugin name is empty")
+	}
+
+	existingPlugin := draPlugins.get(pluginName)
+	if existingPlugin == nil {
+		return nil, fmt.Errorf("plugin name %s not found in the list of registered DRA plugins", pluginName)
+	}
+
+	return existingPlugin, nil
+}
+
+type Plugin struct {
+	name          string
+	backgroundCtx context.Context
+	cancel        func(cause error)
+
+	mutex             sync.Mutex
+	conn              *grpc.ClientConn
+	endpoint          string
+	chosenService     string // e.g. drapbv1beta1.DRAPluginService
+	clientCallTimeout time.Duration
+}
+
+func (p *Plugin) getOrCreateGRPCConn() (*grpc.ClientConn, error) {
+	p.mutex.Lock()
+	defer p.mutex.Unlock()
+
+	if p.conn != nil {
+		return p.conn, nil
+	}
+
+	ctx := p.backgroundCtx
+	logger := klog.FromContext(ctx)
+
+	network := "unix"
+	logger.V(4).Info("Creating new gRPC connection", "protocol", network, "endpoint", p.endpoint)
+	// grpc.Dial is deprecated. grpc.NewClient should be used instead.
+	// For now this gets ignored because this function is meant to establish
+	// the connection, with the one second timeout below. Perhaps that
+	// approach should be reconsidered?
+	//nolint:staticcheck
+	conn, err := grpc.Dial(
+		p.endpoint,
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+		grpc.WithContextDialer(func(ctx context.Context, target string) (net.Conn, error) {
+			return (&net.Dialer{}).DialContext(ctx, network, target)
+		}),
+		grpc.WithChainUnaryInterceptor(newMetricsInterceptor(p.name)),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+
+	if ok := conn.WaitForStateChange(ctx, connectivity.Connecting); !ok {
+		return nil, errors.New("timed out waiting for gRPC connection to be ready")
+	}
+
+	p.conn = conn
+	return p.conn, nil
+}
+
+func (p *Plugin) NodePrepareResources(
+	ctx context.Context,
+	req *drapbv1beta1.NodePrepareResourcesRequest,
+	opts ...grpc.CallOption,
+) (*drapbv1beta1.NodePrepareResourcesResponse, error) {
+	logger := klog.FromContext(ctx)
+	logger.V(4).Info("Calling NodePrepareResources rpc", "request", req)
+
+	conn, err := p.getOrCreateGRPCConn()
+	if err != nil {
+		return nil, err
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
+	defer cancel()
+
+	var response *drapbv1beta1.NodePrepareResourcesResponse
+	switch p.chosenService {
+	case drapbv1beta1.DRAPluginService:
+		nodeClient := drapbv1beta1.NewDRAPluginClient(conn)
+		response, err = nodeClient.NodePrepareResources(ctx, req)
+	case drapbv1alpha4.NodeService:
+		nodeClient := drapbv1alpha4.NewNodeClient(conn)
+		response, err = drapbv1alpha4.V1Alpha4ClientWrapper{NodeClient: nodeClient}.NodePrepareResources(ctx, req)
+	default:
+		// Shouldn't happen, validateSupportedServices should only
+		// return services we support here.
+		return nil, fmt.Errorf("internal error: unsupported chosen service: %q", p.chosenService)
+	}
+	logger.V(4).Info("Done calling NodePrepareResources rpc", "response", response, "err", err)
+	return response, err
+}
+
+func (p *Plugin) NodeUnprepareResources(
+	ctx context.Context,
+	req *drapbv1beta1.NodeUnprepareResourcesRequest,
+	opts ...grpc.CallOption,
+) (*drapbv1beta1.NodeUnprepareResourcesResponse, error) {
+	logger := klog.FromContext(ctx)
+	logger.V(4).Info("Calling NodeUnprepareResource rpc", "request", req)
+
+	conn, err := p.getOrCreateGRPCConn()
+	if err != nil {
+		return nil, err
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
+	defer cancel()
+
+	var response *drapbv1beta1.NodeUnprepareResourcesResponse
+	switch p.chosenService {
+	case drapbv1beta1.DRAPluginService:
+		nodeClient := drapbv1beta1.NewDRAPluginClient(conn)
+		response, err = nodeClient.NodeUnprepareResources(ctx, req)
+	case drapbv1alpha4.NodeService:
+		nodeClient := drapbv1alpha4.NewNodeClient(conn)
+		response, err = drapbv1alpha4.V1Alpha4ClientWrapper{NodeClient: nodeClient}.NodeUnprepareResources(ctx, req)
+	default:
+		// Shouldn't happen, validateSupportedServices should only
+		// return services we support here.
+		return nil, fmt.Errorf("internal error: unsupported chosen service: %q", p.chosenService)
+	}
+	logger.V(4).Info("Done calling NodeUnprepareResources rpc", "response", response, "err", err)
+	return response, err
+}
+
+func newMetricsInterceptor(pluginName string) grpc.UnaryClientInterceptor {
+	return func(ctx context.Context, method string, req, reply any, conn *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
+		start := time.Now()
+		err := invoker(ctx, method, req, reply, conn, opts...)
+		metrics.DRAGRPCOperationsDuration.WithLabelValues(pluginName, method, status.Code(err).String()).Observe(time.Since(start).Seconds())
+		return err
+	}
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugins_store.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugins_store.go
@ -0,0 +1,79 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package plugin
+
+import (
+	"errors"
+	"sync"
+)
+
+// PluginsStore holds a list of DRA Plugins.
+type pluginsStore struct {
+	sync.RWMutex
+	store map[string]*Plugin
+}
+
+// draPlugins map keeps track of all registered DRA plugins on the node
+// and their corresponding sockets.
+var draPlugins = &pluginsStore{}
+
+// Get lets you retrieve a DRA Plugin by name.
+// This method is protected by a mutex.
+func (s *pluginsStore) get(pluginName string) *Plugin {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.store[pluginName]
+}
+
+// Set lets you save a DRA Plugin to the list and give it a specific name.
+// This method is protected by a mutex.
+func (s *pluginsStore) add(p *Plugin) (replacedPlugin *Plugin, replaced bool) {
+	s.Lock()
+	defer s.Unlock()
+
+	if s.store == nil {
+		s.store = make(map[string]*Plugin)
+	}
+
+	replacedPlugin, exists := s.store[p.name]
+	s.store[p.name] = p
+
+	if replacedPlugin != nil && replacedPlugin.cancel != nil {
+		replacedPlugin.cancel(errors.New("plugin got replaced"))
+	}
+
+	return replacedPlugin, exists
+}
+
+// Delete lets you delete a DRA Plugin by name.
+// This method is protected by a mutex.
+func (s *pluginsStore) delete(pluginName string) *Plugin {
+	s.Lock()
+	defer s.Unlock()
+
+	p, exists := s.store[pluginName]
+	if !exists {
+		return nil
+	}
+	if p.cancel != nil {
+		p.cancel(errors.New("plugin got removed"))
+	}
+	delete(s.store, pluginName)
+
+	return p
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/registration.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/registration.go
@ -0,0 +1,249 @@
+/*
+Copyright 2022 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package plugin
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"slices"
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	resourceapi "k8s.io/api/resource/v1beta1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/fields"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/klog/v2"
+	drapbv1alpha4 "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
+	drapbv1beta1 "k8s.io/kubelet/pkg/apis/dra/v1beta1"
+	"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
+)
+
+// defaultClientCallTimeout is the default amount of time that a DRA driver has
+// to respond to any of the gRPC calls. kubelet uses this value by passing nil
+// to RegisterPlugin. Some tests use a different, usually shorter timeout to
+// speed up testing.
+//
+// This is half of the kubelet retry period (according to
+// https://github.com/kubernetes/kubernetes/commit/0449cef8fd5217d394c5cd331d852bd50983e6b3).
+const defaultClientCallTimeout = 45 * time.Second
+
+// RegistrationHandler is the handler which is fed to the pluginwatcher API.
+type RegistrationHandler struct {
+	// backgroundCtx is used for all future activities of the handler.
+	// This is necessary because it implements APIs which don't
+	// provide a context.
+	backgroundCtx context.Context
+	kubeClient    kubernetes.Interface
+	getNode       func() (*v1.Node, error)
+}
+
+var _ cache.PluginHandler = &RegistrationHandler{}
+
+// NewPluginHandler returns new registration handler.
+//
+// Must only be called once per process because it manages global state.
+// If a kubeClient is provided, then it synchronizes ResourceSlices
+// with the resource information provided by plugins.
+func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *RegistrationHandler {
+	handler := &RegistrationHandler{
+		// The context and thus logger should come from the caller.
+		backgroundCtx: klog.NewContext(context.TODO(), klog.LoggerWithName(klog.TODO(), "DRA registration handler")),
+		kubeClient:    kubeClient,
+		getNode:       getNode,
+	}
+
+	// When kubelet starts up, no DRA driver has registered yet. None of
+	// the drivers are usable until they come back, which might not happen
+	// at all. Therefore it is better to not advertise any local resources
+	// because pods could get stuck on the node waiting for the driver
+	// to start up.
+	//
+	// This has to run in the background.
+	go handler.wipeResourceSlices("")
+
+	return handler
+}
+
+// wipeResourceSlices deletes ResourceSlices of the node, optionally just for a specific driver.
+func (h *RegistrationHandler) wipeResourceSlices(driver string) {
+	if h.kubeClient == nil {
+		return
+	}
+	ctx := h.backgroundCtx
+	logger := klog.FromContext(ctx)
+
+	backoff := wait.Backoff{
+		Duration: time.Second,
+		Factor:   2,
+		Jitter:   0.2,
+		Cap:      5 * time.Minute,
+		Steps:    100,
+	}
+
+	// Error logging is done inside the loop. Context cancellation doesn't get logged.
+	_ = wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (bool, error) {
+		node, err := h.getNode()
+		if apierrors.IsNotFound(err) {
+			return false, nil
+		}
+		if err != nil {
+			logger.Error(err, "Unexpected error checking for node")
+			return false, nil
+		}
+		fieldSelector := fields.Set{resourceapi.ResourceSliceSelectorNodeName: node.Name}
+		if driver != "" {
+			fieldSelector[resourceapi.ResourceSliceSelectorDriver] = driver
+		}
+
+		err = h.kubeClient.ResourceV1beta1().ResourceSlices().DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{FieldSelector: fieldSelector.String()})
+		switch {
+		case err == nil:
+			logger.V(3).Info("Deleted ResourceSlices", "fieldSelector", fieldSelector)
+			return true, nil
+		case apierrors.IsUnauthorized(err):
+			// This can happen while kubelet is still figuring out
+			// its credentials.
+			logger.V(5).Info("Deleting ResourceSlice failed, retrying", "fieldSelector", fieldSelector, "err", err)
+			return false, nil
+		default:
+			// Log and retry for other errors.
+			logger.V(3).Info("Deleting ResourceSlice failed, retrying", "fieldSelector", fieldSelector, "err", err)
+			return false, nil
+		}
+	})
+}
+
+// RegisterPlugin is called when a plugin can be registered.
+//
+// DRA uses the version array in the registration API to enumerate all gRPC
+// services that the plugin provides, using the "<gRPC package name>.<service
+// name>" format (e.g. "v1beta1.DRAPlugin"). This allows kubelet to determine
+// in advance which version to use resp. which optional services the plugin
+// supports.
+func (h *RegistrationHandler) RegisterPlugin(pluginName string, endpoint string, supportedServices []string, pluginClientTimeout *time.Duration) error {
+	// Prepare a context with its own logger for the plugin.
+	//
+	// The lifecycle of the plugin's background activities is tied to our
+	// root context, so canceling that will also cancel the plugin.
+	//
+	// The logger injects the plugin name as additional value
+	// into all log output related to the plugin.
+	ctx := h.backgroundCtx
+	logger := klog.FromContext(ctx)
+	logger = klog.LoggerWithValues(logger, "pluginName", pluginName)
+	ctx = klog.NewContext(ctx, logger)
+
+	logger.V(3).Info("Register new DRA plugin", "endpoint", endpoint)
+
+	chosenService, err := h.validateSupportedServices(pluginName, supportedServices)
+	if err != nil {
+		return fmt.Errorf("version check of plugin %s failed: %w", pluginName, err)
+	}
+
+	var timeout time.Duration
+	if pluginClientTimeout == nil {
+		timeout = defaultClientCallTimeout
+	} else {
+		timeout = *pluginClientTimeout
+	}
+
+	ctx, cancel := context.WithCancelCause(ctx)
+
+	pluginInstance := &Plugin{
+		name:              pluginName,
+		backgroundCtx:     ctx,
+		cancel:            cancel,
+		conn:              nil,
+		endpoint:          endpoint,
+		chosenService:     chosenService,
+		clientCallTimeout: timeout,
+	}
+
+	// Storing endpoint of newly registered DRA Plugin into the map, where plugin name will be the key
+	// all other DRA components will be able to get the actual socket of DRA plugins by its name.
+
+	if oldPlugin, replaced := draPlugins.add(pluginInstance); replaced {
+		logger.V(1).Info("DRA plugin already registered, the old plugin was replaced and will be forgotten by the kubelet till the next kubelet restart", "oldEndpoint", oldPlugin.endpoint)
+	}
+
+	return nil
+}
+
+// validateSupportedServices identifies the highest supported gRPC service for
+// NodePrepareResources and NodeUnprepareResources and returns its name
+// (e.g. [drapbv1beta1.DRAPluginService]). An error is returned if the plugin
+// is unusable.
+func (h *RegistrationHandler) validateSupportedServices(pluginName string, supportedServices []string) (string, error) {
+	if len(supportedServices) == 0 {
+		return "", errors.New("empty list of supported gRPC services (aka supported versions)")
+	}
+
+	// Pick most recent version if available.
+	chosenService := ""
+	for _, service := range []string{
+		// Sorted by most recent first, oldest last.
+		drapbv1beta1.DRAPluginService,
+		drapbv1alpha4.NodeService,
+	} {
+		if slices.Contains(supportedServices, service) {
+			chosenService = service
+			break
+		}
+	}
+
+	// Fall back to alpha if necessary because
+	// plugins at that time didn't advertise gRPC services.
+	if chosenService == "" {
+		chosenService = drapbv1alpha4.NodeService
+	}
+
+	return chosenService, nil
+}
+
+// DeRegisterPlugin is called when a plugin has removed its socket,
+// signaling it is no longer available.
+func (h *RegistrationHandler) DeRegisterPlugin(pluginName string) {
+	if p := draPlugins.delete(pluginName); p != nil {
+		logger := klog.FromContext(p.backgroundCtx)
+		logger.V(3).Info("Deregister DRA plugin", "endpoint", p.endpoint)
+
+		// Clean up the ResourceSlices for the deleted Plugin since it
+		// may have died without doing so itself and might never come
+		// back.
+		go h.wipeResourceSlices(pluginName)
+
+		return
+	}
+
+	logger := klog.FromContext(h.backgroundCtx)
+	logger.V(3).Info("Deregister DRA plugin not necessary, was already removed")
+}
+
+// ValidatePlugin is called by kubelet's plugin watcher upon detection
+// of a new registration socket opened by DRA plugin.
+func (h *RegistrationHandler) ValidatePlugin(pluginName string, endpoint string, supportedServices []string) error {
+	_, err := h.validateSupportedServices(pluginName, supportedServices)
+	if err != nil {
+		return fmt.Errorf("invalid versions of plugin %s: %w", pluginName, err)
+	}
+
+	return err
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpoint.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpoint.go
@ -0,0 +1,107 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"encoding/json"
+	"hash/crc32"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
+)
+
+const (
+	CheckpointAPIGroup   = "checkpoint.dra.kubelet.k8s.io"
+	CheckpointKind       = "DRACheckpoint"
+	CheckpointAPIVersion = CheckpointAPIGroup + "/v1"
+)
+
+// Checkpoint represents a structure to store DRA checkpoint data
+type Checkpoint struct {
+	// Data is a JSON serialized checkpoint data
+	Data string
+	// Checksum is a checksum of Data
+	Checksum uint32
+}
+
+type CheckpointData struct {
+	metav1.TypeMeta
+	ClaimInfoStateList ClaimInfoStateList
+}
+
+// NewCheckpoint creates a new checkpoint from a list of claim info states
+func NewCheckpoint(data ClaimInfoStateList) (*Checkpoint, error) {
+	cpData := &CheckpointData{
+		TypeMeta: metav1.TypeMeta{
+			Kind:       CheckpointKind,
+			APIVersion: CheckpointAPIVersion,
+		},
+		ClaimInfoStateList: data,
+	}
+
+	cpDataBytes, err := json.Marshal(cpData)
+	if err != nil {
+		return nil, err
+	}
+
+	cp := &Checkpoint{
+		Data:     string(cpDataBytes),
+		Checksum: crc32.ChecksumIEEE(cpDataBytes),
+	}
+
+	return cp, nil
+}
+
+// MarshalCheckpoint marshals checkpoint to JSON
+func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) {
+	return json.Marshal(cp)
+}
+
+// UnmarshalCheckpoint unmarshals checkpoint from JSON
+// and verifies its data checksum
+func (cp *Checkpoint) UnmarshalCheckpoint(blob []byte) error {
+	if err := json.Unmarshal(blob, cp); err != nil {
+		return err
+	}
+
+	// verify checksum
+	if err := cp.VerifyChecksum(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// VerifyChecksum verifies that current checksum
+// of checkpointed Data is valid
+func (cp *Checkpoint) VerifyChecksum() error {
+	expectedCS := crc32.ChecksumIEEE([]byte(cp.Data))
+	if expectedCS != cp.Checksum {
+		return &errors.CorruptCheckpointError{ActualCS: uint64(cp.Checksum), ExpectedCS: uint64(expectedCS)}
+	}
+	return nil
+}
+
+// GetClaimInfoStateList returns list of claim info states from checkpoint
+func (cp *Checkpoint) GetClaimInfoStateList() (ClaimInfoStateList, error) {
+	var data CheckpointData
+	if err := json.Unmarshal([]byte(cp.Data), &data); err != nil {
+		return nil, err
+	}
+
+	return data.ClaimInfoStateList, nil
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpointer.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpointer.go
@ -0,0 +1,98 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+
+	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
+	checkpointerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
+)
+
+type Checkpointer interface {
+	GetOrCreate() (*Checkpoint, error)
+	Store(*Checkpoint) error
+}
+
+type checkpointer struct {
+	sync.RWMutex
+	checkpointManager checkpointmanager.CheckpointManager
+	checkpointName    string
+}
+
+// NewCheckpointer creates new checkpointer for keeping track of claim info  with checkpoint backend
+func NewCheckpointer(stateDir, checkpointName string) (Checkpointer, error) {
+	if len(checkpointName) == 0 {
+		return nil, fmt.Errorf("received empty string instead of checkpointName")
+	}
+
+	checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
+	if err != nil {
+		return nil, fmt.Errorf("failed to initialize checkpoint manager: %w", err)
+	}
+
+	checkpointer := &checkpointer{
+		checkpointManager: checkpointManager,
+		checkpointName:    checkpointName,
+	}
+
+	return checkpointer, nil
+}
+
+// GetOrCreate gets list of claim info states from a checkpoint
+// or creates empty list if checkpoint doesn't exist
+func (sc *checkpointer) GetOrCreate() (*Checkpoint, error) {
+	sc.Lock()
+	defer sc.Unlock()
+
+	checkpoint, err := NewCheckpoint(nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create new checkpoint: %w", err)
+	}
+
+	err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint)
+	if errors.Is(err, checkpointerrors.ErrCheckpointNotFound) {
+		err = sc.store(checkpoint)
+		if err != nil {
+			return nil, fmt.Errorf("failed to store checkpoint %v: %w", sc.checkpointName, err)
+		}
+		return checkpoint, nil
+	}
+	if err != nil {
+		return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
+	}
+
+	return checkpoint, nil
+}
+
+// Store stores checkpoint to the file
+func (sc *checkpointer) Store(checkpoint *Checkpoint) error {
+	sc.Lock()
+	defer sc.Unlock()
+
+	return sc.store(checkpoint)
+}
+
+// store saves state to a checkpoint, caller is responsible for locking
+func (sc *checkpointer) store(checkpoint *Checkpoint) error {
+	if err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint); err != nil {
+		return fmt.Errorf("could not save checkpoint %s: %w", sc.checkpointName, err)
+	}
+	return nil
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/state.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/state.go
@ -0,0 +1,59 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/sets"
+)
+
+type ClaimInfoStateList []ClaimInfoState
+
+// +k8s:deepcopy-gen=true
+type ClaimInfoState struct {
+	// ClaimUID is the UID of a resource claim
+	ClaimUID types.UID
+
+	// ClaimName is the name of a resource claim
+	ClaimName string
+
+	// Namespace is a claim namespace
+	Namespace string
+
+	// PodUIDs is a set of pod UIDs that reference a resource
+	PodUIDs sets.Set[string]
+
+	// DriverState contains information about all drivers which have allocation
+	// results in the claim, even if they don't provide devices for their results.
+	DriverState map[string]DriverState
+}
+
+// DriverState is used to store per-device claim info state in a checkpoint
+// +k8s:deepcopy-gen=true
+type DriverState struct {
+	Devices []Device
+}
+
+// Device is how a DRA driver described an allocated device in a claim
+// to kubelet. RequestName and CDI device IDs are optional.
+// +k8s:deepcopy-gen=true
+type Device struct {
+	PoolName     string
+	DeviceName   string
+	RequestNames []string
+	CDIDeviceIDs []string
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/zz_generated.deepcopy.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/zz_generated.deepcopy.go
@ -0,0 +1,105 @@
+//go:build !ignore_autogenerated
+// +build !ignore_autogenerated
+
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by deepcopy-gen. DO NOT EDIT.
+
+package state
+
+import (
+	sets "k8s.io/apimachinery/pkg/util/sets"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClaimInfoState) DeepCopyInto(out *ClaimInfoState) {
+	*out = *in
+	if in.PodUIDs != nil {
+		in, out := &in.PodUIDs, &out.PodUIDs
+		*out = make(sets.Set[string], len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+	if in.DriverState != nil {
+		in, out := &in.DriverState, &out.DriverState
+		*out = make(map[string]DriverState, len(*in))
+		for key, val := range *in {
+			(*out)[key] = *val.DeepCopy()
+		}
+	}
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfoState.
+func (in *ClaimInfoState) DeepCopy() *ClaimInfoState {
+	if in == nil {
+		return nil
+	}
+	out := new(ClaimInfoState)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Device) DeepCopyInto(out *Device) {
+	*out = *in
+	if in.RequestNames != nil {
+		in, out := &in.RequestNames, &out.RequestNames
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.CDIDeviceIDs != nil {
+		in, out := &in.CDIDeviceIDs, &out.CDIDeviceIDs
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Device.
+func (in *Device) DeepCopy() *Device {
+	if in == nil {
+		return nil
+	}
+	out := new(Device)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DriverState) DeepCopyInto(out *DriverState) {
+	*out = *in
+	if in.Devices != nil {
+		in, out := &in.Devices, &out.Devices
+		*out = make([]Device, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverState.
+func (in *DriverState) DeepCopy() *DriverState {
+	if in == nil {
+		return nil
+	}
+	out := new(DriverState)
+	in.DeepCopyInto(out)
+	return out
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/types.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/types.go
@ -0,0 +1,61 @@
+/*
+Copyright 2022 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package dra
+
+import (
+	"context"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/kubernetes/pkg/kubelet/config"
+	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
+	"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
+)
+
+// Manager manages all the DRA resource plugins running on a node.
+type Manager interface {
+	// GetWatcherHandler returns the plugin handler for the DRA.
+	GetWatcherHandler() cache.PluginHandler
+
+	// Start starts the reconcile loop of the manager.
+	// This will ensure that all claims are unprepared even if pods get deleted unexpectedly.
+	Start(ctx context.Context, activePods ActivePodsFunc, getNode GetNodeFunc, sourcesReady config.SourcesReady) error
+
+	// PrepareResources prepares resources for a pod.
+	// It communicates with the DRA resource plugin to prepare resources.
+	PrepareResources(ctx context.Context, pod *v1.Pod) error
+
+	// UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources
+	UnprepareResources(ctx context.Context, pod *v1.Pod) error
+
+	// GetResources gets a ContainerInfo object from the claimInfo cache.
+	// This information is used by the caller to update a container config.
+	GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error)
+
+	// PodMightNeedToUnprepareResources returns true if the pod with the given UID
+	// might need to unprepare resources.
+	PodMightNeedToUnprepareResources(UID types.UID) bool
+
+	// GetContainerClaimInfos gets Container ClaimInfo objects
+	GetContainerClaimInfos(pod *v1.Pod, container *v1.Container) ([]*ClaimInfo, error)
+}
+
+// ContainerInfo contains information required by the runtime to consume prepared resources.
+type ContainerInfo struct {
+	// CDI Devices for the container
+	CDIDevices []kubecontainer.CDIDevice
+}
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/zz_generated.deepcopy.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/zz_generated.deepcopy.go
@ -0,0 +1,39 @@
+//go:build !ignore_autogenerated
+// +build !ignore_autogenerated
+
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by deepcopy-gen. DO NOT EDIT.
+
+package dra
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClaimInfo) DeepCopyInto(out *ClaimInfo) {
+	*out = *in
+	in.ClaimInfoState.DeepCopyInto(&out.ClaimInfoState)
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfo.
+func (in *ClaimInfo) DeepCopy() *ClaimInfo {
+	if in == nil {
+		return nil
+	}
+	out := new(ClaimInfo)
+	in.DeepCopyInto(out)
+	return out
+}