mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 18:53:35 +00:00
rebase: update K8s packages to v0.32.1
Update K8s packages in go.mod to v0.32.1 Signed-off-by: Praveen M <m.praveen@ibm.com>
This commit is contained in:
2
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/OWNERS
generated
vendored
Normal file
2
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/OWNERS
generated
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
labels:
|
||||
- wg/device-management
|
222
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/claiminfo.go
generated
vendored
Normal file
222
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/claiminfo.go
generated
vendored
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dra
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
// ClaimInfo holds information required
|
||||
// to prepare and unprepare a resource claim.
|
||||
// +k8s:deepcopy-gen=true
|
||||
type ClaimInfo struct {
|
||||
state.ClaimInfoState
|
||||
prepared bool
|
||||
}
|
||||
|
||||
// claimInfoCache is a cache of processed resource claims keyed by namespace/claimname.
|
||||
type claimInfoCache struct {
|
||||
sync.RWMutex
|
||||
checkpointer state.Checkpointer
|
||||
claimInfo map[string]*ClaimInfo
|
||||
}
|
||||
|
||||
// newClaimInfoFromClaim creates a new claim info from a resource claim.
|
||||
// It verifies that the kubelet can handle the claim.
|
||||
func newClaimInfoFromClaim(claim *resourceapi.ResourceClaim) (*ClaimInfo, error) {
|
||||
claimInfoState := state.ClaimInfoState{
|
||||
ClaimUID: claim.UID,
|
||||
ClaimName: claim.Name,
|
||||
Namespace: claim.Namespace,
|
||||
PodUIDs: sets.New[string](),
|
||||
DriverState: make(map[string]state.DriverState),
|
||||
}
|
||||
if claim.Status.Allocation == nil {
|
||||
return nil, errors.New("not allocated")
|
||||
}
|
||||
for _, result := range claim.Status.Allocation.Devices.Results {
|
||||
claimInfoState.DriverState[result.Driver] = state.DriverState{}
|
||||
}
|
||||
info := &ClaimInfo{
|
||||
ClaimInfoState: claimInfoState,
|
||||
prepared: false,
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// newClaimInfoFromClaim creates a new claim info from a checkpointed claim info state object.
|
||||
func newClaimInfoFromState(state *state.ClaimInfoState) *ClaimInfo {
|
||||
info := &ClaimInfo{
|
||||
ClaimInfoState: *state.DeepCopy(),
|
||||
prepared: false,
|
||||
}
|
||||
return info
|
||||
}
|
||||
|
||||
// setCDIDevices adds a set of CDI devices to the claim info.
|
||||
func (info *ClaimInfo) addDevice(driverName string, deviceState state.Device) {
|
||||
if info.DriverState == nil {
|
||||
info.DriverState = make(map[string]state.DriverState)
|
||||
}
|
||||
driverState := info.DriverState[driverName]
|
||||
driverState.Devices = append(driverState.Devices, deviceState)
|
||||
info.DriverState[driverName] = driverState
|
||||
}
|
||||
|
||||
// addPodReference adds a pod reference to the claim info.
|
||||
func (info *ClaimInfo) addPodReference(podUID types.UID) {
|
||||
info.PodUIDs.Insert(string(podUID))
|
||||
}
|
||||
|
||||
// hasPodReference checks if a pod reference exists in the claim info.
|
||||
func (info *ClaimInfo) hasPodReference(podUID types.UID) bool {
|
||||
return info.PodUIDs.Has(string(podUID))
|
||||
}
|
||||
|
||||
// deletePodReference deletes a pod reference from the claim info.
|
||||
func (info *ClaimInfo) deletePodReference(podUID types.UID) {
|
||||
info.PodUIDs.Delete(string(podUID))
|
||||
}
|
||||
|
||||
// setPrepared marks the claim info as prepared.
|
||||
func (info *ClaimInfo) setPrepared() {
|
||||
info.prepared = true
|
||||
}
|
||||
|
||||
// isPrepared checks if claim info is prepared or not.
|
||||
func (info *ClaimInfo) isPrepared() bool {
|
||||
return info.prepared
|
||||
}
|
||||
|
||||
// newClaimInfoCache creates a new claim info cache object, pre-populated from a checkpoint (if present).
|
||||
func newClaimInfoCache(stateDir, checkpointName string) (*claimInfoCache, error) {
|
||||
checkpointer, err := state.NewCheckpointer(stateDir, checkpointName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not initialize checkpoint manager, please drain node and remove dra state file, err: %w", err)
|
||||
}
|
||||
|
||||
checkpoint, err := checkpointer.GetOrCreate()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error calling GetOrCreate() on checkpoint state: %w", err)
|
||||
}
|
||||
|
||||
cache := &claimInfoCache{
|
||||
checkpointer: checkpointer,
|
||||
claimInfo: make(map[string]*ClaimInfo),
|
||||
}
|
||||
|
||||
entries, err := checkpoint.GetClaimInfoStateList()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error calling GetEntries() on checkpoint: %w", err)
|
||||
|
||||
}
|
||||
for _, entry := range entries {
|
||||
info := newClaimInfoFromState(&entry)
|
||||
cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
|
||||
}
|
||||
|
||||
return cache, nil
|
||||
}
|
||||
|
||||
// withLock runs a function while holding the claimInfoCache lock.
|
||||
func (cache *claimInfoCache) withLock(f func() error) error {
|
||||
cache.Lock()
|
||||
defer cache.Unlock()
|
||||
return f()
|
||||
}
|
||||
|
||||
// withRLock runs a function while holding the claimInfoCache rlock.
|
||||
func (cache *claimInfoCache) withRLock(f func() error) error {
|
||||
cache.RLock()
|
||||
defer cache.RUnlock()
|
||||
return f()
|
||||
}
|
||||
|
||||
// add adds a new claim info object into the claim info cache.
|
||||
func (cache *claimInfoCache) add(info *ClaimInfo) *ClaimInfo {
|
||||
cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
|
||||
return info
|
||||
}
|
||||
|
||||
// contains checks to see if a specific claim info object is already in the cache.
|
||||
func (cache *claimInfoCache) contains(claimName, namespace string) bool {
|
||||
_, exists := cache.claimInfo[namespace+"/"+claimName]
|
||||
return exists
|
||||
}
|
||||
|
||||
// get gets a specific claim info object from the cache.
|
||||
func (cache *claimInfoCache) get(claimName, namespace string) (*ClaimInfo, bool) {
|
||||
info, exists := cache.claimInfo[namespace+"/"+claimName]
|
||||
return info, exists
|
||||
}
|
||||
|
||||
// delete deletes a specific claim info object from the cache.
|
||||
func (cache *claimInfoCache) delete(claimName, namespace string) {
|
||||
delete(cache.claimInfo, namespace+"/"+claimName)
|
||||
}
|
||||
|
||||
// hasPodReference checks if there is at least one claim
|
||||
// that is referenced by the pod with the given UID
|
||||
// This function is used indirectly by the status manager
|
||||
// to check if pod can enter termination status
|
||||
func (cache *claimInfoCache) hasPodReference(uid types.UID) bool {
|
||||
for _, claimInfo := range cache.claimInfo {
|
||||
if claimInfo.hasPodReference(uid) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// syncToCheckpoint syncs the full claim info cache state to a checkpoint.
|
||||
func (cache *claimInfoCache) syncToCheckpoint() error {
|
||||
claimInfoStateList := make(state.ClaimInfoStateList, 0, len(cache.claimInfo))
|
||||
for _, infoClaim := range cache.claimInfo {
|
||||
claimInfoStateList = append(claimInfoStateList, infoClaim.ClaimInfoState)
|
||||
}
|
||||
checkpoint, err := state.NewCheckpoint(claimInfoStateList)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return cache.checkpointer.Store(checkpoint)
|
||||
}
|
||||
|
||||
// cdiDevicesAsList returns a list of CDIDevices from the provided claim info.
|
||||
// When the request name is non-empty, only devices relevant for that request
|
||||
// are returned.
|
||||
func (info *ClaimInfo) cdiDevicesAsList(requestName string) []kubecontainer.CDIDevice {
|
||||
var cdiDevices []kubecontainer.CDIDevice
|
||||
for _, driverData := range info.DriverState {
|
||||
for _, device := range driverData.Devices {
|
||||
if requestName == "" || len(device.RequestNames) == 0 || slices.Contains(device.RequestNames, requestName) {
|
||||
for _, cdiDeviceID := range device.CDIDeviceIDs {
|
||||
cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: cdiDeviceID})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return cdiDevices
|
||||
}
|
553
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/manager.go
generated
vendored
Normal file
553
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/manager.go
generated
vendored
Normal file
@ -0,0 +1,553 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dra
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/dynamic-resource-allocation/resourceclaim"
|
||||
"k8s.io/klog/v2"
|
||||
drapb "k8s.io/kubelet/pkg/apis/dra/v1beta1"
|
||||
dra "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
// draManagerStateFileName is the file name where dra manager stores its state
|
||||
const draManagerStateFileName = "dra_manager_state"
|
||||
|
||||
// defaultReconcilePeriod is the default reconciliation period to keep all claim info state in sync.
|
||||
const defaultReconcilePeriod = 60 * time.Second
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
type ActivePodsFunc func() []*v1.Pod
|
||||
|
||||
// GetNodeFunc is a function that returns the node object using the kubelet's node lister.
|
||||
type GetNodeFunc func() (*v1.Node, error)
|
||||
|
||||
// ManagerImpl is the structure in charge of managing DRA drivers.
|
||||
type ManagerImpl struct {
|
||||
// cache contains cached claim info
|
||||
cache *claimInfoCache
|
||||
|
||||
// reconcilePeriod is the duration between calls to reconcileLoop.
|
||||
reconcilePeriod time.Duration
|
||||
|
||||
// activePods is a method for listing active pods on the node
|
||||
// so all claim info state can be updated in the reconciliation loop.
|
||||
activePods ActivePodsFunc
|
||||
|
||||
// sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness.
|
||||
// We use it to determine when we can treat pods as inactive and react appropriately.
|
||||
sourcesReady config.SourcesReady
|
||||
|
||||
// KubeClient reference
|
||||
kubeClient clientset.Interface
|
||||
|
||||
// getNode is a function that returns the node object using the kubelet's node lister.
|
||||
getNode GetNodeFunc
|
||||
}
|
||||
|
||||
// NewManagerImpl creates a new manager.
|
||||
func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, nodeName types.NodeName) (*ManagerImpl, error) {
|
||||
claimInfoCache, err := newClaimInfoCache(stateFileDirectory, draManagerStateFileName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create claimInfo cache: %w", err)
|
||||
}
|
||||
|
||||
// TODO: for now the reconcile period is not configurable.
|
||||
// We should consider making it configurable in the future.
|
||||
reconcilePeriod := defaultReconcilePeriod
|
||||
|
||||
manager := &ManagerImpl{
|
||||
cache: claimInfoCache,
|
||||
kubeClient: kubeClient,
|
||||
reconcilePeriod: reconcilePeriod,
|
||||
activePods: nil,
|
||||
sourcesReady: nil,
|
||||
}
|
||||
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) GetWatcherHandler() cache.PluginHandler {
|
||||
return cache.PluginHandler(dra.NewRegistrationHandler(m.kubeClient, m.getNode))
|
||||
}
|
||||
|
||||
// Start starts the reconcile loop of the manager.
|
||||
func (m *ManagerImpl) Start(ctx context.Context, activePods ActivePodsFunc, getNode GetNodeFunc, sourcesReady config.SourcesReady) error {
|
||||
m.activePods = activePods
|
||||
m.getNode = getNode
|
||||
m.sourcesReady = sourcesReady
|
||||
go wait.UntilWithContext(ctx, func(ctx context.Context) { m.reconcileLoop(ctx) }, m.reconcilePeriod)
|
||||
return nil
|
||||
}
|
||||
|
||||
// reconcileLoop ensures that any stale state in the manager's claimInfoCache gets periodically reconciled.
|
||||
func (m *ManagerImpl) reconcileLoop(ctx context.Context) {
|
||||
logger := klog.FromContext(ctx)
|
||||
// Only once all sources are ready do we attempt to reconcile.
|
||||
// This ensures that the call to m.activePods() below will succeed with
|
||||
// the actual active pods list.
|
||||
if m.sourcesReady == nil || !m.sourcesReady.AllReady() {
|
||||
return
|
||||
}
|
||||
|
||||
// Get the full list of active pods.
|
||||
activePods := sets.New[string]()
|
||||
for _, p := range m.activePods() {
|
||||
activePods.Insert(string(p.UID))
|
||||
}
|
||||
|
||||
// Get the list of inactive pods still referenced by any claimInfos.
|
||||
type podClaims struct {
|
||||
uid types.UID
|
||||
namespace string
|
||||
claimNames []string
|
||||
}
|
||||
inactivePodClaims := make(map[string]*podClaims)
|
||||
m.cache.RLock()
|
||||
for _, claimInfo := range m.cache.claimInfo {
|
||||
for podUID := range claimInfo.PodUIDs {
|
||||
if activePods.Has(podUID) {
|
||||
continue
|
||||
}
|
||||
if inactivePodClaims[podUID] == nil {
|
||||
inactivePodClaims[podUID] = &podClaims{
|
||||
uid: types.UID(podUID),
|
||||
namespace: claimInfo.Namespace,
|
||||
claimNames: []string{},
|
||||
}
|
||||
}
|
||||
inactivePodClaims[podUID].claimNames = append(inactivePodClaims[podUID].claimNames, claimInfo.ClaimName)
|
||||
}
|
||||
}
|
||||
m.cache.RUnlock()
|
||||
|
||||
// Loop through all inactive pods and call UnprepareResources on them.
|
||||
for _, podClaims := range inactivePodClaims {
|
||||
if err := m.unprepareResources(ctx, podClaims.uid, podClaims.namespace, podClaims.claimNames); err != nil {
|
||||
logger.Info("Unpreparing pod resources in reconcile loop failed, will retry", "podUID", podClaims.uid, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PrepareResources attempts to prepare all of the required resources
|
||||
// for the input container, issue NodePrepareResources rpc requests
|
||||
// for each new resource requirement, process their responses and update the cached
|
||||
// containerResources on success.
|
||||
func (m *ManagerImpl) PrepareResources(ctx context.Context, pod *v1.Pod) error {
|
||||
startTime := time.Now()
|
||||
err := m.prepareResources(ctx, pod)
|
||||
metrics.DRAOperationsDuration.WithLabelValues("PrepareResources", strconv.FormatBool(err == nil)).Observe(time.Since(startTime).Seconds())
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) prepareResources(ctx context.Context, pod *v1.Pod) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
batches := make(map[string][]*drapb.Claim)
|
||||
resourceClaims := make(map[types.UID]*resourceapi.ResourceClaim)
|
||||
for i := range pod.Spec.ResourceClaims {
|
||||
podClaim := &pod.Spec.ResourceClaims[i]
|
||||
logger.V(3).Info("Processing resource", "pod", klog.KObj(pod), "podClaim", podClaim.Name)
|
||||
claimName, mustCheckOwner, err := resourceclaim.Name(pod, podClaim)
|
||||
if err != nil {
|
||||
return fmt.Errorf("prepare resource claim: %w", err)
|
||||
}
|
||||
|
||||
if claimName == nil {
|
||||
// Nothing to do.
|
||||
logger.V(5).Info("No need to prepare resources, no claim generated", "pod", klog.KObj(pod), "podClaim", podClaim.Name)
|
||||
continue
|
||||
}
|
||||
// Query claim object from the API server
|
||||
resourceClaim, err := m.kubeClient.ResourceV1beta1().ResourceClaims(pod.Namespace).Get(
|
||||
ctx,
|
||||
*claimName,
|
||||
metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch ResourceClaim %s referenced by pod %s: %w", *claimName, pod.Name, err)
|
||||
}
|
||||
|
||||
if mustCheckOwner {
|
||||
if err = resourceclaim.IsForPod(pod, resourceClaim); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Check if pod is in the ReservedFor for the claim
|
||||
if !resourceclaim.IsReservedForPod(pod, resourceClaim) {
|
||||
return fmt.Errorf("pod %s(%s) is not allowed to use resource claim %s(%s)",
|
||||
pod.Name, pod.UID, *claimName, resourceClaim.UID)
|
||||
}
|
||||
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err = m.cache.withLock(func() error {
|
||||
// Get a reference to the claim info for this claim from the cache.
|
||||
// If there isn't one yet, then add it to the cache.
|
||||
claimInfo, exists := m.cache.get(resourceClaim.Name, resourceClaim.Namespace)
|
||||
if !exists {
|
||||
ci, err := newClaimInfoFromClaim(resourceClaim)
|
||||
if err != nil {
|
||||
return fmt.Errorf("claim %s: %w", klog.KObj(resourceClaim), err)
|
||||
}
|
||||
claimInfo = m.cache.add(ci)
|
||||
logger.V(6).Info("Created new claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo)
|
||||
} else {
|
||||
logger.V(6).Info("Found existing claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo)
|
||||
}
|
||||
|
||||
// Add a reference to the current pod in the claim info.
|
||||
claimInfo.addPodReference(pod.UID)
|
||||
|
||||
// Checkpoint to ensure all claims we plan to prepare are tracked.
|
||||
// If something goes wrong and the newly referenced pod gets
|
||||
// deleted without a successful prepare call, we will catch
|
||||
// that in the reconcile loop and take the appropriate action.
|
||||
if err := m.cache.syncToCheckpoint(); err != nil {
|
||||
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
|
||||
}
|
||||
|
||||
// If this claim is already prepared, there is no need to prepare it again.
|
||||
if claimInfo.isPrepared() {
|
||||
logger.V(5).Info("Resources already prepared", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim))
|
||||
return nil
|
||||
}
|
||||
|
||||
// This saved claim will be used to update ClaimInfo cache
|
||||
// after NodePrepareResources GRPC succeeds
|
||||
resourceClaims[claimInfo.ClaimUID] = resourceClaim
|
||||
|
||||
// Loop through all drivers and prepare for calling NodePrepareResources.
|
||||
claim := &drapb.Claim{
|
||||
Namespace: claimInfo.Namespace,
|
||||
UID: string(claimInfo.ClaimUID),
|
||||
Name: claimInfo.ClaimName,
|
||||
}
|
||||
for driverName := range claimInfo.DriverState {
|
||||
batches[driverName] = append(batches[driverName], claim)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Call NodePrepareResources for all claims in each batch.
|
||||
// If there is any error, processing gets aborted.
|
||||
// We could try to continue, but that would make the code more complex.
|
||||
for driverName, claims := range batches {
|
||||
// Call NodePrepareResources RPC for all resource handles.
|
||||
client, err := dra.NewDRAPluginClient(driverName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get gRPC client for driver %s: %w", driverName, err)
|
||||
}
|
||||
response, err := client.NodePrepareResources(ctx, &drapb.NodePrepareResourcesRequest{Claims: claims})
|
||||
if err != nil {
|
||||
// General error unrelated to any particular claim.
|
||||
return fmt.Errorf("NodePrepareResources failed: %w", err)
|
||||
}
|
||||
for claimUID, result := range response.Claims {
|
||||
reqClaim := lookupClaimRequest(claims, claimUID)
|
||||
if reqClaim == nil {
|
||||
return fmt.Errorf("NodePrepareResources returned result for unknown claim UID %s", claimUID)
|
||||
}
|
||||
if result.GetError() != "" {
|
||||
return fmt.Errorf("NodePrepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
|
||||
}
|
||||
|
||||
claim := resourceClaims[types.UID(claimUID)]
|
||||
|
||||
// Add the prepared CDI devices to the claim info
|
||||
err := m.cache.withLock(func() error {
|
||||
info, exists := m.cache.get(claim.Name, claim.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
|
||||
}
|
||||
for _, device := range result.GetDevices() {
|
||||
info.addDevice(driverName, state.Device{PoolName: device.PoolName, DeviceName: device.DeviceName, RequestNames: device.RequestNames, CDIDeviceIDs: device.CDIDeviceIDs})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
unfinished := len(claims) - len(response.Claims)
|
||||
if unfinished != 0 {
|
||||
return fmt.Errorf("NodePrepareResources left out %d claims", unfinished)
|
||||
}
|
||||
}
|
||||
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err := m.cache.withLock(func() error {
|
||||
// Mark all pod claims as prepared.
|
||||
for _, claim := range resourceClaims {
|
||||
info, exists := m.cache.get(claim.Name, claim.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", claim.Name, claim.Namespace)
|
||||
}
|
||||
info.setPrepared()
|
||||
}
|
||||
|
||||
// Checkpoint to ensure all prepared claims are tracked with their list
|
||||
// of CDI devices attached.
|
||||
if err := m.cache.syncToCheckpoint(); err != nil {
|
||||
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func lookupClaimRequest(claims []*drapb.Claim, claimUID string) *drapb.Claim {
|
||||
for _, claim := range claims {
|
||||
if claim.UID == claimUID {
|
||||
return claim
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetResources gets a ContainerInfo object from the claimInfo cache.
|
||||
// This information is used by the caller to update a container config.
|
||||
func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error) {
|
||||
cdiDevices := []kubecontainer.CDIDevice{}
|
||||
|
||||
for i := range pod.Spec.ResourceClaims {
|
||||
podClaim := &pod.Spec.ResourceClaims[i]
|
||||
claimName, _, err := resourceclaim.Name(pod, podClaim)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list resource claims: %w", err)
|
||||
}
|
||||
// The claim name might be nil if no underlying resource claim
|
||||
// was generated for the referenced claim. There are valid use
|
||||
// cases when this might happen, so we simply skip it.
|
||||
if claimName == nil {
|
||||
continue
|
||||
}
|
||||
for _, claim := range container.Resources.Claims {
|
||||
if podClaim.Name != claim.Name {
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.cache.withRLock(func() error {
|
||||
claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
|
||||
}
|
||||
|
||||
// As of Kubernetes 1.31, CDI device IDs are not passed via annotations anymore.
|
||||
cdiDevices = append(cdiDevices, claimInfo.cdiDevicesAsList(claim.Request)...)
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return &ContainerInfo{CDIDevices: cdiDevices}, nil
|
||||
}
|
||||
|
||||
// UnprepareResources calls a driver's NodeUnprepareResource API for each resource claim owned by a pod.
|
||||
// This function is idempotent and may be called multiple times against the same pod.
|
||||
// As such, calls to the underlying NodeUnprepareResource API are skipped for claims that have
|
||||
// already been successfully unprepared.
|
||||
func (m *ManagerImpl) UnprepareResources(ctx context.Context, pod *v1.Pod) error {
|
||||
var err error = nil
|
||||
defer func(startTime time.Time) {
|
||||
metrics.DRAOperationsDuration.WithLabelValues("UnprepareResources", strconv.FormatBool(err != nil)).Observe(time.Since(startTime).Seconds())
|
||||
}(time.Now())
|
||||
var claimNames []string
|
||||
for i := range pod.Spec.ResourceClaims {
|
||||
claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
|
||||
if err != nil {
|
||||
return fmt.Errorf("unprepare resource claim: %w", err)
|
||||
}
|
||||
// The claim name might be nil if no underlying resource claim
|
||||
// was generated for the referenced claim. There are valid use
|
||||
// cases when this might happen, so we simply skip it.
|
||||
if claimName == nil {
|
||||
continue
|
||||
}
|
||||
claimNames = append(claimNames, *claimName)
|
||||
}
|
||||
err = m.unprepareResources(ctx, pod.UID, pod.Namespace, claimNames)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) unprepareResources(ctx context.Context, podUID types.UID, namespace string, claimNames []string) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
batches := make(map[string][]*drapb.Claim)
|
||||
claimNamesMap := make(map[types.UID]string)
|
||||
for _, claimName := range claimNames {
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err := m.cache.withLock(func() error {
|
||||
// Get the claim info from the cache
|
||||
claimInfo, exists := m.cache.get(claimName, namespace)
|
||||
|
||||
// Skip calling NodeUnprepareResource if claim info is not cached
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip calling NodeUnprepareResource if other pods are still referencing it
|
||||
if len(claimInfo.PodUIDs) > 1 {
|
||||
// We delay checkpointing of this change until
|
||||
// UnprepareResources returns successfully. It is OK to do
|
||||
// this because we will only return successfully from this call
|
||||
// if the checkpoint has succeeded. That means if the kubelet
|
||||
// is ever restarted before this checkpoint succeeds, we will
|
||||
// simply call into this (idempotent) function again.
|
||||
claimInfo.deletePodReference(podUID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// This claimInfo name will be used to update ClaimInfo cache
|
||||
// after NodeUnprepareResources GRPC succeeds
|
||||
claimNamesMap[claimInfo.ClaimUID] = claimInfo.ClaimName
|
||||
|
||||
// Loop through all drivers and prepare for calling NodeUnprepareResources.
|
||||
claim := &drapb.Claim{
|
||||
Namespace: claimInfo.Namespace,
|
||||
UID: string(claimInfo.ClaimUID),
|
||||
Name: claimInfo.ClaimName,
|
||||
}
|
||||
for driverName := range claimInfo.DriverState {
|
||||
batches[driverName] = append(batches[driverName], claim)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Call NodeUnprepareResources for all claims in each batch.
|
||||
// If there is any error, processing gets aborted.
|
||||
// We could try to continue, but that would make the code more complex.
|
||||
for driverName, claims := range batches {
|
||||
// Call NodeUnprepareResources RPC for all resource handles.
|
||||
client, err := dra.NewDRAPluginClient(driverName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get gRPC client for DRA driver %s: %w", driverName, err)
|
||||
}
|
||||
response, err := client.NodeUnprepareResources(ctx, &drapb.NodeUnprepareResourcesRequest{Claims: claims})
|
||||
if err != nil {
|
||||
// General error unrelated to any particular claim.
|
||||
return fmt.Errorf("NodeUnprepareResources failed: %w", err)
|
||||
}
|
||||
|
||||
for claimUID, result := range response.Claims {
|
||||
reqClaim := lookupClaimRequest(claims, claimUID)
|
||||
if reqClaim == nil {
|
||||
return fmt.Errorf("NodeUnprepareResources returned result for unknown claim UID %s", claimUID)
|
||||
}
|
||||
if result.GetError() != "" {
|
||||
return fmt.Errorf("NodeUnprepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error)
|
||||
}
|
||||
}
|
||||
|
||||
unfinished := len(claims) - len(response.Claims)
|
||||
if unfinished != 0 {
|
||||
return fmt.Errorf("NodeUnprepareResources left out %d claims", unfinished)
|
||||
}
|
||||
}
|
||||
|
||||
// Atomically perform some operations on the claimInfo cache.
|
||||
err := m.cache.withLock(func() error {
|
||||
// Delete all claimInfos from the cache that have just been unprepared.
|
||||
for _, claimName := range claimNamesMap {
|
||||
claimInfo, _ := m.cache.get(claimName, namespace)
|
||||
m.cache.delete(claimName, namespace)
|
||||
logger.V(6).Info("Deleted claim info cache entry", "claim", klog.KRef(namespace, claimName), "claimInfoEntry", claimInfo)
|
||||
}
|
||||
|
||||
// Atomically sync the cache back to the checkpoint.
|
||||
if err := m.cache.syncToCheckpoint(); err != nil {
|
||||
return fmt.Errorf("failed to checkpoint claimInfo state: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod might need to
|
||||
// unprepare resources
|
||||
func (m *ManagerImpl) PodMightNeedToUnprepareResources(uid types.UID) bool {
|
||||
m.cache.Lock()
|
||||
defer m.cache.Unlock()
|
||||
return m.cache.hasPodReference(uid)
|
||||
}
|
||||
|
||||
// GetContainerClaimInfos gets Container's ClaimInfo
|
||||
func (m *ManagerImpl) GetContainerClaimInfos(pod *v1.Pod, container *v1.Container) ([]*ClaimInfo, error) {
|
||||
claimInfos := make([]*ClaimInfo, 0, len(pod.Spec.ResourceClaims))
|
||||
|
||||
for i, podResourceClaim := range pod.Spec.ResourceClaims {
|
||||
claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("determine resource claim information: %w", err)
|
||||
}
|
||||
|
||||
for _, claim := range container.Resources.Claims {
|
||||
if podResourceClaim.Name != claim.Name {
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.cache.withRLock(func() error {
|
||||
claimInfo, exists := m.cache.get(*claimName, pod.Namespace)
|
||||
if !exists {
|
||||
return fmt.Errorf("unable to get claim info for claim %s in namespace %s", *claimName, pod.Namespace)
|
||||
}
|
||||
claimInfos = append(claimInfos, claimInfo.DeepCopy())
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("locked cache operation: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return claimInfos, nil
|
||||
}
|
181
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugin.go
generated
vendored
Normal file
181
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugin.go
generated
vendored
Normal file
@ -0,0 +1,181 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package plugin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/connectivity"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
drapbv1alpha4 "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
|
||||
drapbv1beta1 "k8s.io/kubelet/pkg/apis/dra/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
)
|
||||
|
||||
// NewDRAPluginClient returns a wrapper around those gRPC methods of a DRA
|
||||
// driver kubelet plugin which need to be called by kubelet. The wrapper
|
||||
// handles gRPC connection management and logging. Connections are reused
|
||||
// across different NewDRAPluginClient calls.
|
||||
func NewDRAPluginClient(pluginName string) (*Plugin, error) {
|
||||
if pluginName == "" {
|
||||
return nil, fmt.Errorf("plugin name is empty")
|
||||
}
|
||||
|
||||
existingPlugin := draPlugins.get(pluginName)
|
||||
if existingPlugin == nil {
|
||||
return nil, fmt.Errorf("plugin name %s not found in the list of registered DRA plugins", pluginName)
|
||||
}
|
||||
|
||||
return existingPlugin, nil
|
||||
}
|
||||
|
||||
type Plugin struct {
|
||||
name string
|
||||
backgroundCtx context.Context
|
||||
cancel func(cause error)
|
||||
|
||||
mutex sync.Mutex
|
||||
conn *grpc.ClientConn
|
||||
endpoint string
|
||||
chosenService string // e.g. drapbv1beta1.DRAPluginService
|
||||
clientCallTimeout time.Duration
|
||||
}
|
||||
|
||||
func (p *Plugin) getOrCreateGRPCConn() (*grpc.ClientConn, error) {
|
||||
p.mutex.Lock()
|
||||
defer p.mutex.Unlock()
|
||||
|
||||
if p.conn != nil {
|
||||
return p.conn, nil
|
||||
}
|
||||
|
||||
ctx := p.backgroundCtx
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
network := "unix"
|
||||
logger.V(4).Info("Creating new gRPC connection", "protocol", network, "endpoint", p.endpoint)
|
||||
// grpc.Dial is deprecated. grpc.NewClient should be used instead.
|
||||
// For now this gets ignored because this function is meant to establish
|
||||
// the connection, with the one second timeout below. Perhaps that
|
||||
// approach should be reconsidered?
|
||||
//nolint:staticcheck
|
||||
conn, err := grpc.Dial(
|
||||
p.endpoint,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
grpc.WithContextDialer(func(ctx context.Context, target string) (net.Conn, error) {
|
||||
return (&net.Dialer{}).DialContext(ctx, network, target)
|
||||
}),
|
||||
grpc.WithChainUnaryInterceptor(newMetricsInterceptor(p.name)),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
|
||||
if ok := conn.WaitForStateChange(ctx, connectivity.Connecting); !ok {
|
||||
return nil, errors.New("timed out waiting for gRPC connection to be ready")
|
||||
}
|
||||
|
||||
p.conn = conn
|
||||
return p.conn, nil
|
||||
}
|
||||
|
||||
func (p *Plugin) NodePrepareResources(
|
||||
ctx context.Context,
|
||||
req *drapbv1beta1.NodePrepareResourcesRequest,
|
||||
opts ...grpc.CallOption,
|
||||
) (*drapbv1beta1.NodePrepareResourcesResponse, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(4).Info("Calling NodePrepareResources rpc", "request", req)
|
||||
|
||||
conn, err := p.getOrCreateGRPCConn()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
var response *drapbv1beta1.NodePrepareResourcesResponse
|
||||
switch p.chosenService {
|
||||
case drapbv1beta1.DRAPluginService:
|
||||
nodeClient := drapbv1beta1.NewDRAPluginClient(conn)
|
||||
response, err = nodeClient.NodePrepareResources(ctx, req)
|
||||
case drapbv1alpha4.NodeService:
|
||||
nodeClient := drapbv1alpha4.NewNodeClient(conn)
|
||||
response, err = drapbv1alpha4.V1Alpha4ClientWrapper{NodeClient: nodeClient}.NodePrepareResources(ctx, req)
|
||||
default:
|
||||
// Shouldn't happen, validateSupportedServices should only
|
||||
// return services we support here.
|
||||
return nil, fmt.Errorf("internal error: unsupported chosen service: %q", p.chosenService)
|
||||
}
|
||||
logger.V(4).Info("Done calling NodePrepareResources rpc", "response", response, "err", err)
|
||||
return response, err
|
||||
}
|
||||
|
||||
func (p *Plugin) NodeUnprepareResources(
|
||||
ctx context.Context,
|
||||
req *drapbv1beta1.NodeUnprepareResourcesRequest,
|
||||
opts ...grpc.CallOption,
|
||||
) (*drapbv1beta1.NodeUnprepareResourcesResponse, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(4).Info("Calling NodeUnprepareResource rpc", "request", req)
|
||||
|
||||
conn, err := p.getOrCreateGRPCConn()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
var response *drapbv1beta1.NodeUnprepareResourcesResponse
|
||||
switch p.chosenService {
|
||||
case drapbv1beta1.DRAPluginService:
|
||||
nodeClient := drapbv1beta1.NewDRAPluginClient(conn)
|
||||
response, err = nodeClient.NodeUnprepareResources(ctx, req)
|
||||
case drapbv1alpha4.NodeService:
|
||||
nodeClient := drapbv1alpha4.NewNodeClient(conn)
|
||||
response, err = drapbv1alpha4.V1Alpha4ClientWrapper{NodeClient: nodeClient}.NodeUnprepareResources(ctx, req)
|
||||
default:
|
||||
// Shouldn't happen, validateSupportedServices should only
|
||||
// return services we support here.
|
||||
return nil, fmt.Errorf("internal error: unsupported chosen service: %q", p.chosenService)
|
||||
}
|
||||
logger.V(4).Info("Done calling NodeUnprepareResources rpc", "response", response, "err", err)
|
||||
return response, err
|
||||
}
|
||||
|
||||
func newMetricsInterceptor(pluginName string) grpc.UnaryClientInterceptor {
|
||||
return func(ctx context.Context, method string, req, reply any, conn *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
|
||||
start := time.Now()
|
||||
err := invoker(ctx, method, req, reply, conn, opts...)
|
||||
metrics.DRAGRPCOperationsDuration.WithLabelValues(pluginName, method, status.Code(err).String()).Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
}
|
||||
}
|
79
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugins_store.go
generated
vendored
Normal file
79
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/plugins_store.go
generated
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package plugin
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// PluginsStore holds a list of DRA Plugins.
|
||||
type pluginsStore struct {
|
||||
sync.RWMutex
|
||||
store map[string]*Plugin
|
||||
}
|
||||
|
||||
// draPlugins map keeps track of all registered DRA plugins on the node
|
||||
// and their corresponding sockets.
|
||||
var draPlugins = &pluginsStore{}
|
||||
|
||||
// Get lets you retrieve a DRA Plugin by name.
|
||||
// This method is protected by a mutex.
|
||||
func (s *pluginsStore) get(pluginName string) *Plugin {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.store[pluginName]
|
||||
}
|
||||
|
||||
// Set lets you save a DRA Plugin to the list and give it a specific name.
|
||||
// This method is protected by a mutex.
|
||||
func (s *pluginsStore) add(p *Plugin) (replacedPlugin *Plugin, replaced bool) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
if s.store == nil {
|
||||
s.store = make(map[string]*Plugin)
|
||||
}
|
||||
|
||||
replacedPlugin, exists := s.store[p.name]
|
||||
s.store[p.name] = p
|
||||
|
||||
if replacedPlugin != nil && replacedPlugin.cancel != nil {
|
||||
replacedPlugin.cancel(errors.New("plugin got replaced"))
|
||||
}
|
||||
|
||||
return replacedPlugin, exists
|
||||
}
|
||||
|
||||
// Delete lets you delete a DRA Plugin by name.
|
||||
// This method is protected by a mutex.
|
||||
func (s *pluginsStore) delete(pluginName string) *Plugin {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
p, exists := s.store[pluginName]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
if p.cancel != nil {
|
||||
p.cancel(errors.New("plugin got removed"))
|
||||
}
|
||||
delete(s.store, pluginName)
|
||||
|
||||
return p
|
||||
}
|
249
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/registration.go
generated
vendored
Normal file
249
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin/registration.go
generated
vendored
Normal file
@ -0,0 +1,249 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package plugin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
drapbv1alpha4 "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
|
||||
drapbv1beta1 "k8s.io/kubelet/pkg/apis/dra/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
// defaultClientCallTimeout is the default amount of time that a DRA driver has
|
||||
// to respond to any of the gRPC calls. kubelet uses this value by passing nil
|
||||
// to RegisterPlugin. Some tests use a different, usually shorter timeout to
|
||||
// speed up testing.
|
||||
//
|
||||
// This is half of the kubelet retry period (according to
|
||||
// https://github.com/kubernetes/kubernetes/commit/0449cef8fd5217d394c5cd331d852bd50983e6b3).
|
||||
const defaultClientCallTimeout = 45 * time.Second
|
||||
|
||||
// RegistrationHandler is the handler which is fed to the pluginwatcher API.
|
||||
type RegistrationHandler struct {
|
||||
// backgroundCtx is used for all future activities of the handler.
|
||||
// This is necessary because it implements APIs which don't
|
||||
// provide a context.
|
||||
backgroundCtx context.Context
|
||||
kubeClient kubernetes.Interface
|
||||
getNode func() (*v1.Node, error)
|
||||
}
|
||||
|
||||
var _ cache.PluginHandler = &RegistrationHandler{}
|
||||
|
||||
// NewPluginHandler returns new registration handler.
|
||||
//
|
||||
// Must only be called once per process because it manages global state.
|
||||
// If a kubeClient is provided, then it synchronizes ResourceSlices
|
||||
// with the resource information provided by plugins.
|
||||
func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *RegistrationHandler {
|
||||
handler := &RegistrationHandler{
|
||||
// The context and thus logger should come from the caller.
|
||||
backgroundCtx: klog.NewContext(context.TODO(), klog.LoggerWithName(klog.TODO(), "DRA registration handler")),
|
||||
kubeClient: kubeClient,
|
||||
getNode: getNode,
|
||||
}
|
||||
|
||||
// When kubelet starts up, no DRA driver has registered yet. None of
|
||||
// the drivers are usable until they come back, which might not happen
|
||||
// at all. Therefore it is better to not advertise any local resources
|
||||
// because pods could get stuck on the node waiting for the driver
|
||||
// to start up.
|
||||
//
|
||||
// This has to run in the background.
|
||||
go handler.wipeResourceSlices("")
|
||||
|
||||
return handler
|
||||
}
|
||||
|
||||
// wipeResourceSlices deletes ResourceSlices of the node, optionally just for a specific driver.
|
||||
func (h *RegistrationHandler) wipeResourceSlices(driver string) {
|
||||
if h.kubeClient == nil {
|
||||
return
|
||||
}
|
||||
ctx := h.backgroundCtx
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
backoff := wait.Backoff{
|
||||
Duration: time.Second,
|
||||
Factor: 2,
|
||||
Jitter: 0.2,
|
||||
Cap: 5 * time.Minute,
|
||||
Steps: 100,
|
||||
}
|
||||
|
||||
// Error logging is done inside the loop. Context cancellation doesn't get logged.
|
||||
_ = wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (bool, error) {
|
||||
node, err := h.getNode()
|
||||
if apierrors.IsNotFound(err) {
|
||||
return false, nil
|
||||
}
|
||||
if err != nil {
|
||||
logger.Error(err, "Unexpected error checking for node")
|
||||
return false, nil
|
||||
}
|
||||
fieldSelector := fields.Set{resourceapi.ResourceSliceSelectorNodeName: node.Name}
|
||||
if driver != "" {
|
||||
fieldSelector[resourceapi.ResourceSliceSelectorDriver] = driver
|
||||
}
|
||||
|
||||
err = h.kubeClient.ResourceV1beta1().ResourceSlices().DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{FieldSelector: fieldSelector.String()})
|
||||
switch {
|
||||
case err == nil:
|
||||
logger.V(3).Info("Deleted ResourceSlices", "fieldSelector", fieldSelector)
|
||||
return true, nil
|
||||
case apierrors.IsUnauthorized(err):
|
||||
// This can happen while kubelet is still figuring out
|
||||
// its credentials.
|
||||
logger.V(5).Info("Deleting ResourceSlice failed, retrying", "fieldSelector", fieldSelector, "err", err)
|
||||
return false, nil
|
||||
default:
|
||||
// Log and retry for other errors.
|
||||
logger.V(3).Info("Deleting ResourceSlice failed, retrying", "fieldSelector", fieldSelector, "err", err)
|
||||
return false, nil
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// RegisterPlugin is called when a plugin can be registered.
|
||||
//
|
||||
// DRA uses the version array in the registration API to enumerate all gRPC
|
||||
// services that the plugin provides, using the "<gRPC package name>.<service
|
||||
// name>" format (e.g. "v1beta1.DRAPlugin"). This allows kubelet to determine
|
||||
// in advance which version to use resp. which optional services the plugin
|
||||
// supports.
|
||||
func (h *RegistrationHandler) RegisterPlugin(pluginName string, endpoint string, supportedServices []string, pluginClientTimeout *time.Duration) error {
|
||||
// Prepare a context with its own logger for the plugin.
|
||||
//
|
||||
// The lifecycle of the plugin's background activities is tied to our
|
||||
// root context, so canceling that will also cancel the plugin.
|
||||
//
|
||||
// The logger injects the plugin name as additional value
|
||||
// into all log output related to the plugin.
|
||||
ctx := h.backgroundCtx
|
||||
logger := klog.FromContext(ctx)
|
||||
logger = klog.LoggerWithValues(logger, "pluginName", pluginName)
|
||||
ctx = klog.NewContext(ctx, logger)
|
||||
|
||||
logger.V(3).Info("Register new DRA plugin", "endpoint", endpoint)
|
||||
|
||||
chosenService, err := h.validateSupportedServices(pluginName, supportedServices)
|
||||
if err != nil {
|
||||
return fmt.Errorf("version check of plugin %s failed: %w", pluginName, err)
|
||||
}
|
||||
|
||||
var timeout time.Duration
|
||||
if pluginClientTimeout == nil {
|
||||
timeout = defaultClientCallTimeout
|
||||
} else {
|
||||
timeout = *pluginClientTimeout
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancelCause(ctx)
|
||||
|
||||
pluginInstance := &Plugin{
|
||||
name: pluginName,
|
||||
backgroundCtx: ctx,
|
||||
cancel: cancel,
|
||||
conn: nil,
|
||||
endpoint: endpoint,
|
||||
chosenService: chosenService,
|
||||
clientCallTimeout: timeout,
|
||||
}
|
||||
|
||||
// Storing endpoint of newly registered DRA Plugin into the map, where plugin name will be the key
|
||||
// all other DRA components will be able to get the actual socket of DRA plugins by its name.
|
||||
|
||||
if oldPlugin, replaced := draPlugins.add(pluginInstance); replaced {
|
||||
logger.V(1).Info("DRA plugin already registered, the old plugin was replaced and will be forgotten by the kubelet till the next kubelet restart", "oldEndpoint", oldPlugin.endpoint)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateSupportedServices identifies the highest supported gRPC service for
|
||||
// NodePrepareResources and NodeUnprepareResources and returns its name
|
||||
// (e.g. [drapbv1beta1.DRAPluginService]). An error is returned if the plugin
|
||||
// is unusable.
|
||||
func (h *RegistrationHandler) validateSupportedServices(pluginName string, supportedServices []string) (string, error) {
|
||||
if len(supportedServices) == 0 {
|
||||
return "", errors.New("empty list of supported gRPC services (aka supported versions)")
|
||||
}
|
||||
|
||||
// Pick most recent version if available.
|
||||
chosenService := ""
|
||||
for _, service := range []string{
|
||||
// Sorted by most recent first, oldest last.
|
||||
drapbv1beta1.DRAPluginService,
|
||||
drapbv1alpha4.NodeService,
|
||||
} {
|
||||
if slices.Contains(supportedServices, service) {
|
||||
chosenService = service
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to alpha if necessary because
|
||||
// plugins at that time didn't advertise gRPC services.
|
||||
if chosenService == "" {
|
||||
chosenService = drapbv1alpha4.NodeService
|
||||
}
|
||||
|
||||
return chosenService, nil
|
||||
}
|
||||
|
||||
// DeRegisterPlugin is called when a plugin has removed its socket,
|
||||
// signaling it is no longer available.
|
||||
func (h *RegistrationHandler) DeRegisterPlugin(pluginName string) {
|
||||
if p := draPlugins.delete(pluginName); p != nil {
|
||||
logger := klog.FromContext(p.backgroundCtx)
|
||||
logger.V(3).Info("Deregister DRA plugin", "endpoint", p.endpoint)
|
||||
|
||||
// Clean up the ResourceSlices for the deleted Plugin since it
|
||||
// may have died without doing so itself and might never come
|
||||
// back.
|
||||
go h.wipeResourceSlices(pluginName)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
logger := klog.FromContext(h.backgroundCtx)
|
||||
logger.V(3).Info("Deregister DRA plugin not necessary, was already removed")
|
||||
}
|
||||
|
||||
// ValidatePlugin is called by kubelet's plugin watcher upon detection
|
||||
// of a new registration socket opened by DRA plugin.
|
||||
func (h *RegistrationHandler) ValidatePlugin(pluginName string, endpoint string, supportedServices []string) error {
|
||||
_, err := h.validateSupportedServices(pluginName, supportedServices)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid versions of plugin %s: %w", pluginName, err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
107
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpoint.go
generated
vendored
Normal file
107
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"hash/crc32"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
const (
|
||||
CheckpointAPIGroup = "checkpoint.dra.kubelet.k8s.io"
|
||||
CheckpointKind = "DRACheckpoint"
|
||||
CheckpointAPIVersion = CheckpointAPIGroup + "/v1"
|
||||
)
|
||||
|
||||
// Checkpoint represents a structure to store DRA checkpoint data
|
||||
type Checkpoint struct {
|
||||
// Data is a JSON serialized checkpoint data
|
||||
Data string
|
||||
// Checksum is a checksum of Data
|
||||
Checksum uint32
|
||||
}
|
||||
|
||||
type CheckpointData struct {
|
||||
metav1.TypeMeta
|
||||
ClaimInfoStateList ClaimInfoStateList
|
||||
}
|
||||
|
||||
// NewCheckpoint creates a new checkpoint from a list of claim info states
|
||||
func NewCheckpoint(data ClaimInfoStateList) (*Checkpoint, error) {
|
||||
cpData := &CheckpointData{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: CheckpointKind,
|
||||
APIVersion: CheckpointAPIVersion,
|
||||
},
|
||||
ClaimInfoStateList: data,
|
||||
}
|
||||
|
||||
cpDataBytes, err := json.Marshal(cpData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cp := &Checkpoint{
|
||||
Data: string(cpDataBytes),
|
||||
Checksum: crc32.ChecksumIEEE(cpDataBytes),
|
||||
}
|
||||
|
||||
return cp, nil
|
||||
}
|
||||
|
||||
// MarshalCheckpoint marshals checkpoint to JSON
|
||||
func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) {
|
||||
return json.Marshal(cp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint unmarshals checkpoint from JSON
|
||||
// and verifies its data checksum
|
||||
func (cp *Checkpoint) UnmarshalCheckpoint(blob []byte) error {
|
||||
if err := json.Unmarshal(blob, cp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// verify checksum
|
||||
if err := cp.VerifyChecksum(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that current checksum
|
||||
// of checkpointed Data is valid
|
||||
func (cp *Checkpoint) VerifyChecksum() error {
|
||||
expectedCS := crc32.ChecksumIEEE([]byte(cp.Data))
|
||||
if expectedCS != cp.Checksum {
|
||||
return &errors.CorruptCheckpointError{ActualCS: uint64(cp.Checksum), ExpectedCS: uint64(expectedCS)}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetClaimInfoStateList returns list of claim info states from checkpoint
|
||||
func (cp *Checkpoint) GetClaimInfoStateList() (ClaimInfoStateList, error) {
|
||||
var data CheckpointData
|
||||
if err := json.Unmarshal([]byte(cp.Data), &data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.ClaimInfoStateList, nil
|
||||
}
|
98
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpointer.go
generated
vendored
Normal file
98
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/checkpointer.go
generated
vendored
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
checkpointerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
type Checkpointer interface {
|
||||
GetOrCreate() (*Checkpoint, error)
|
||||
Store(*Checkpoint) error
|
||||
}
|
||||
|
||||
type checkpointer struct {
|
||||
sync.RWMutex
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
checkpointName string
|
||||
}
|
||||
|
||||
// NewCheckpointer creates new checkpointer for keeping track of claim info with checkpoint backend
|
||||
func NewCheckpointer(stateDir, checkpointName string) (Checkpointer, error) {
|
||||
if len(checkpointName) == 0 {
|
||||
return nil, fmt.Errorf("received empty string instead of checkpointName")
|
||||
}
|
||||
|
||||
checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize checkpoint manager: %w", err)
|
||||
}
|
||||
|
||||
checkpointer := &checkpointer{
|
||||
checkpointManager: checkpointManager,
|
||||
checkpointName: checkpointName,
|
||||
}
|
||||
|
||||
return checkpointer, nil
|
||||
}
|
||||
|
||||
// GetOrCreate gets list of claim info states from a checkpoint
|
||||
// or creates empty list if checkpoint doesn't exist
|
||||
func (sc *checkpointer) GetOrCreate() (*Checkpoint, error) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
checkpoint, err := NewCheckpoint(nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create new checkpoint: %w", err)
|
||||
}
|
||||
|
||||
err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint)
|
||||
if errors.Is(err, checkpointerrors.ErrCheckpointNotFound) {
|
||||
err = sc.store(checkpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to store checkpoint %v: %w", sc.checkpointName, err)
|
||||
}
|
||||
return checkpoint, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
|
||||
}
|
||||
|
||||
return checkpoint, nil
|
||||
}
|
||||
|
||||
// Store stores checkpoint to the file
|
||||
func (sc *checkpointer) Store(checkpoint *Checkpoint) error {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
return sc.store(checkpoint)
|
||||
}
|
||||
|
||||
// store saves state to a checkpoint, caller is responsible for locking
|
||||
func (sc *checkpointer) store(checkpoint *Checkpoint) error {
|
||||
if err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint); err != nil {
|
||||
return fmt.Errorf("could not save checkpoint %s: %w", sc.checkpointName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
59
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/state.go
generated
vendored
Normal file
59
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/state.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
)
|
||||
|
||||
type ClaimInfoStateList []ClaimInfoState
|
||||
|
||||
// +k8s:deepcopy-gen=true
|
||||
type ClaimInfoState struct {
|
||||
// ClaimUID is the UID of a resource claim
|
||||
ClaimUID types.UID
|
||||
|
||||
// ClaimName is the name of a resource claim
|
||||
ClaimName string
|
||||
|
||||
// Namespace is a claim namespace
|
||||
Namespace string
|
||||
|
||||
// PodUIDs is a set of pod UIDs that reference a resource
|
||||
PodUIDs sets.Set[string]
|
||||
|
||||
// DriverState contains information about all drivers which have allocation
|
||||
// results in the claim, even if they don't provide devices for their results.
|
||||
DriverState map[string]DriverState
|
||||
}
|
||||
|
||||
// DriverState is used to store per-device claim info state in a checkpoint
|
||||
// +k8s:deepcopy-gen=true
|
||||
type DriverState struct {
|
||||
Devices []Device
|
||||
}
|
||||
|
||||
// Device is how a DRA driver described an allocated device in a claim
|
||||
// to kubelet. RequestName and CDI device IDs are optional.
|
||||
// +k8s:deepcopy-gen=true
|
||||
type Device struct {
|
||||
PoolName string
|
||||
DeviceName string
|
||||
RequestNames []string
|
||||
CDIDeviceIDs []string
|
||||
}
|
105
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/zz_generated.deepcopy.go
generated
vendored
Normal file
105
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/state/zz_generated.deepcopy.go
generated
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by deepcopy-gen. DO NOT EDIT.
|
||||
|
||||
package state
|
||||
|
||||
import (
|
||||
sets "k8s.io/apimachinery/pkg/util/sets"
|
||||
)
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ClaimInfoState) DeepCopyInto(out *ClaimInfoState) {
|
||||
*out = *in
|
||||
if in.PodUIDs != nil {
|
||||
in, out := &in.PodUIDs, &out.PodUIDs
|
||||
*out = make(sets.Set[string], len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.DriverState != nil {
|
||||
in, out := &in.DriverState, &out.DriverState
|
||||
*out = make(map[string]DriverState, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = *val.DeepCopy()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfoState.
|
||||
func (in *ClaimInfoState) DeepCopy() *ClaimInfoState {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ClaimInfoState)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Device) DeepCopyInto(out *Device) {
|
||||
*out = *in
|
||||
if in.RequestNames != nil {
|
||||
in, out := &in.RequestNames, &out.RequestNames
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.CDIDeviceIDs != nil {
|
||||
in, out := &in.CDIDeviceIDs, &out.CDIDeviceIDs
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Device.
|
||||
func (in *Device) DeepCopy() *Device {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(Device)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *DriverState) DeepCopyInto(out *DriverState) {
|
||||
*out = *in
|
||||
if in.Devices != nil {
|
||||
in, out := &in.Devices, &out.Devices
|
||||
*out = make([]Device, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverState.
|
||||
func (in *DriverState) DeepCopy() *DriverState {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(DriverState)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
61
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/types.go
generated
vendored
Normal file
61
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/types.go
generated
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dra
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
)
|
||||
|
||||
// Manager manages all the DRA resource plugins running on a node.
|
||||
type Manager interface {
|
||||
// GetWatcherHandler returns the plugin handler for the DRA.
|
||||
GetWatcherHandler() cache.PluginHandler
|
||||
|
||||
// Start starts the reconcile loop of the manager.
|
||||
// This will ensure that all claims are unprepared even if pods get deleted unexpectedly.
|
||||
Start(ctx context.Context, activePods ActivePodsFunc, getNode GetNodeFunc, sourcesReady config.SourcesReady) error
|
||||
|
||||
// PrepareResources prepares resources for a pod.
|
||||
// It communicates with the DRA resource plugin to prepare resources.
|
||||
PrepareResources(ctx context.Context, pod *v1.Pod) error
|
||||
|
||||
// UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources
|
||||
UnprepareResources(ctx context.Context, pod *v1.Pod) error
|
||||
|
||||
// GetResources gets a ContainerInfo object from the claimInfo cache.
|
||||
// This information is used by the caller to update a container config.
|
||||
GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error)
|
||||
|
||||
// PodMightNeedToUnprepareResources returns true if the pod with the given UID
|
||||
// might need to unprepare resources.
|
||||
PodMightNeedToUnprepareResources(UID types.UID) bool
|
||||
|
||||
// GetContainerClaimInfos gets Container ClaimInfo objects
|
||||
GetContainerClaimInfos(pod *v1.Pod, container *v1.Container) ([]*ClaimInfo, error)
|
||||
}
|
||||
|
||||
// ContainerInfo contains information required by the runtime to consume prepared resources.
|
||||
type ContainerInfo struct {
|
||||
// CDI Devices for the container
|
||||
CDIDevices []kubecontainer.CDIDevice
|
||||
}
|
39
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/zz_generated.deepcopy.go
generated
vendored
Normal file
39
vendor/k8s.io/kubernetes/pkg/kubelet/cm/dra/zz_generated.deepcopy.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by deepcopy-gen. DO NOT EDIT.
|
||||
|
||||
package dra
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ClaimInfo) DeepCopyInto(out *ClaimInfo) {
|
||||
*out = *in
|
||||
in.ClaimInfoState.DeepCopyInto(&out.ClaimInfoState)
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClaimInfo.
|
||||
func (in *ClaimInfo) DeepCopy() *ClaimInfo {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ClaimInfo)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
Reference in New Issue
Block a user