mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 18:53:35 +00:00
vendor update for CSI 0.3.0
This commit is contained in:
18
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/BUILD
generated
vendored
18
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/BUILD
generated
vendored
@ -15,15 +15,15 @@ go_library(
|
||||
deps = [
|
||||
"//pkg/apis/core/v1/helper:go_default_library",
|
||||
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
|
||||
"//pkg/kubelet/cm/devicemanager/checkpoint:go_default_library",
|
||||
"//pkg/kubelet/config:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/metrics:go_default_library",
|
||||
"//pkg/kubelet/util/store:go_default_library",
|
||||
"//pkg/scheduler/schedulercache:go_default_library",
|
||||
"//pkg/util/filesystem:go_default_library",
|
||||
"//pkg/scheduler/cache:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/golang.org/x/net/context:go_default_library",
|
||||
"//vendor/google.golang.org/grpc:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
@ -40,10 +40,9 @@ go_test(
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/util/store:go_default_library",
|
||||
"//pkg/scheduler/schedulercache:go_default_library",
|
||||
"//pkg/util/filesystem:go_default_library",
|
||||
"//pkg/scheduler/cache:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/require:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
@ -63,7 +62,10 @@ filegroup(
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//pkg/kubelet/cm/devicemanager/checkpoint:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
26
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint/BUILD
generated
vendored
Normal file
26
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint/BUILD
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["checkpoint.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
81
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint/checkpoint.go
generated
vendored
Normal file
81
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint/checkpoint.go
generated
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checkpoint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
)
|
||||
|
||||
type DeviceManagerCheckpoint interface {
|
||||
checkpointmanager.Checkpoint
|
||||
GetData() ([]PodDevicesEntry, map[string][]string)
|
||||
}
|
||||
|
||||
type PodDevicesEntry struct {
|
||||
PodUID string
|
||||
ContainerName string
|
||||
ResourceName string
|
||||
DeviceIDs []string
|
||||
AllocResp []byte
|
||||
}
|
||||
|
||||
// checkpointData struct is used to store pod to device allocation information
|
||||
// in a checkpoint file.
|
||||
// TODO: add version control when we need to change checkpoint format.
|
||||
type checkpointData struct {
|
||||
PodDeviceEntries []PodDevicesEntry
|
||||
RegisteredDevices map[string][]string
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
Data checkpointData
|
||||
Checksum checksum.Checksum
|
||||
}
|
||||
|
||||
// NewDeviceManagerCheckpoint returns an instance of Checkpoint
|
||||
func New(devEntries []PodDevicesEntry,
|
||||
devices map[string][]string) DeviceManagerCheckpoint {
|
||||
return &Data{
|
||||
Data: checkpointData{
|
||||
PodDeviceEntries: devEntries,
|
||||
RegisteredDevices: devices,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// MarshalCheckpoint returns marshalled data
|
||||
func (cp *Data) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(cp.Data)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint returns unmarshalled data
|
||||
func (cp *Data) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that passed checksum is same as calculated checksum
|
||||
func (cp *Data) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(cp.Data)
|
||||
}
|
||||
|
||||
func (cp *Data) GetData() ([]PodDevicesEntry, map[string][]string) {
|
||||
return cp.Data.PodDeviceEntries, cp.Data.RegisteredDevices
|
||||
}
|
30
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/device_plugin_stub.go
generated
vendored
30
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/device_plugin_stub.go
generated
vendored
@ -17,13 +17,14 @@ limitations under the License.
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"path"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
@ -35,6 +36,7 @@ type Stub struct {
|
||||
socket string
|
||||
|
||||
stop chan interface{}
|
||||
wg sync.WaitGroup
|
||||
update chan []*pluginapi.Device
|
||||
|
||||
server *grpc.Server
|
||||
@ -70,7 +72,8 @@ func (m *Stub) SetAllocFunc(f stubAllocFunc) {
|
||||
m.allocFunc = f
|
||||
}
|
||||
|
||||
// Start starts the gRPC server of the device plugin
|
||||
// Start starts the gRPC server of the device plugin. Can only
|
||||
// be called once.
|
||||
func (m *Stub) Start() error {
|
||||
err := m.cleanup()
|
||||
if err != nil {
|
||||
@ -82,10 +85,14 @@ func (m *Stub) Start() error {
|
||||
return err
|
||||
}
|
||||
|
||||
m.wg.Add(1)
|
||||
m.server = grpc.NewServer([]grpc.ServerOption{}...)
|
||||
pluginapi.RegisterDevicePluginServer(m.server, m)
|
||||
|
||||
go m.server.Serve(sock)
|
||||
go func() {
|
||||
defer m.wg.Done()
|
||||
m.server.Serve(sock)
|
||||
}()
|
||||
_, conn, err := dial(m.socket)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -96,18 +103,27 @@ func (m *Stub) Start() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop stops the gRPC server
|
||||
// Stop stops the gRPC server. Can be called without a prior Start
|
||||
// and more than once. Not safe to be called concurrently by different
|
||||
// goroutines!
|
||||
func (m *Stub) Stop() error {
|
||||
if m.server == nil {
|
||||
return nil
|
||||
}
|
||||
m.server.Stop()
|
||||
close(m.stop)
|
||||
m.wg.Wait()
|
||||
m.server = nil
|
||||
close(m.stop) // This prevents re-starting the server.
|
||||
|
||||
return m.cleanup()
|
||||
}
|
||||
|
||||
// Register registers the device plugin for the given resourceName with Kubelet.
|
||||
func (m *Stub) Register(kubeletEndpoint, resourceName string, preStartContainerFlag bool) error {
|
||||
conn, err := grpc.Dial(kubeletEndpoint, grpc.WithInsecure(), grpc.WithBlock(),
|
||||
grpc.WithTimeout(10*time.Second),
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := grpc.DialContext(ctx, kubeletEndpoint, grpc.WithInsecure(), grpc.WithBlock(),
|
||||
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("unix", addr, timeout)
|
||||
}))
|
||||
|
54
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/endpoint.go
generated
vendored
54
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/endpoint.go
generated
vendored
@ -17,13 +17,13 @@ limitations under the License.
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
@ -39,6 +39,8 @@ type endpoint interface {
|
||||
preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error)
|
||||
getDevices() []pluginapi.Device
|
||||
callback(resourceName string, added, updated, deleted []pluginapi.Device)
|
||||
isStopped() bool
|
||||
stopGracePeriodExpired() bool
|
||||
}
|
||||
|
||||
type endpointImpl struct {
|
||||
@ -47,6 +49,7 @@ type endpointImpl struct {
|
||||
|
||||
socketPath string
|
||||
resourceName string
|
||||
stopTime time.Time
|
||||
|
||||
devices map[string]pluginapi.Device
|
||||
mutex sync.Mutex
|
||||
@ -55,6 +58,7 @@ type endpointImpl struct {
|
||||
}
|
||||
|
||||
// newEndpoint creates a new endpoint for the given resourceName.
|
||||
// This is to be used during normal device plugin registration.
|
||||
func newEndpointImpl(socketPath, resourceName string, devices map[string]pluginapi.Device, callback monitorCallback) (*endpointImpl, error) {
|
||||
client, c, err := dial(socketPath)
|
||||
if err != nil {
|
||||
@ -74,6 +78,16 @@ func newEndpointImpl(socketPath, resourceName string, devices map[string]plugina
|
||||
}, nil
|
||||
}
|
||||
|
||||
// newStoppedEndpointImpl creates a new endpoint for the given resourceName with stopTime set.
|
||||
// This is to be used during Kubelet restart, before the actual device plugin re-registers.
|
||||
func newStoppedEndpointImpl(resourceName string, devices map[string]pluginapi.Device) *endpointImpl {
|
||||
return &endpointImpl{
|
||||
resourceName: resourceName,
|
||||
devices: devices,
|
||||
stopTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
func (e *endpointImpl) callback(resourceName string, added, updated, deleted []pluginapi.Device) {
|
||||
e.cb(resourceName, added, updated, deleted)
|
||||
}
|
||||
@ -176,8 +190,30 @@ func (e *endpointImpl) run() {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *endpointImpl) isStopped() bool {
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
return !e.stopTime.IsZero()
|
||||
}
|
||||
|
||||
func (e *endpointImpl) stopGracePeriodExpired() bool {
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
return !e.stopTime.IsZero() && time.Since(e.stopTime) > endpointStopGracePeriod
|
||||
}
|
||||
|
||||
// used for testing only
|
||||
func (e *endpointImpl) setStopTime(t time.Time) {
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
e.stopTime = t
|
||||
}
|
||||
|
||||
// allocate issues Allocate gRPC call to the device plugin.
|
||||
func (e *endpointImpl) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
|
||||
if e.isStopped() {
|
||||
return nil, fmt.Errorf(errEndpointStopped, e)
|
||||
}
|
||||
return e.client.Allocate(context.Background(), &pluginapi.AllocateRequest{
|
||||
ContainerRequests: []*pluginapi.ContainerAllocateRequest{
|
||||
{DevicesIDs: devs},
|
||||
@ -187,6 +223,9 @@ func (e *endpointImpl) allocate(devs []string) (*pluginapi.AllocateResponse, err
|
||||
|
||||
// preStartContainer issues PreStartContainer gRPC call to the device plugin.
|
||||
func (e *endpointImpl) preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error) {
|
||||
if e.isStopped() {
|
||||
return nil, fmt.Errorf(errEndpointStopped, e)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), pluginapi.KubeletPreStartContainerRPCTimeoutInSecs*time.Second)
|
||||
defer cancel()
|
||||
return e.client.PreStartContainer(ctx, &pluginapi.PreStartContainerRequest{
|
||||
@ -195,13 +234,20 @@ func (e *endpointImpl) preStartContainer(devs []string) (*pluginapi.PreStartCont
|
||||
}
|
||||
|
||||
func (e *endpointImpl) stop() {
|
||||
e.clientConn.Close()
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
if e.clientConn != nil {
|
||||
e.clientConn.Close()
|
||||
}
|
||||
e.stopTime = time.Now()
|
||||
}
|
||||
|
||||
// dial establishes the gRPC communication with the registered device plugin. https://godoc.org/google.golang.org/grpc#Dial
|
||||
func dial(unixSocketPath string) (pluginapi.DevicePluginClient, *grpc.ClientConn, error) {
|
||||
c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
|
||||
grpc.WithTimeout(10*time.Second),
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
c, err := grpc.DialContext(ctx, unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
|
||||
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("unix", addr, timeout)
|
||||
}),
|
||||
|
150
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager.go
generated
vendored
150
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager.go
generated
vendored
@ -17,7 +17,7 @@ limitations under the License.
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
@ -26,7 +26,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
@ -34,12 +33,13 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
@ -59,6 +59,7 @@ type ManagerImpl struct {
|
||||
mutex sync.Mutex
|
||||
|
||||
server *grpc.Server
|
||||
wg sync.WaitGroup
|
||||
|
||||
// activePods is a method for listing active pods on the node
|
||||
// so the amount of pluginResources requested by existing pods
|
||||
@ -83,9 +84,9 @@ type ManagerImpl struct {
|
||||
allocatedDevices map[string]sets.String
|
||||
|
||||
// podDevices contains pod to allocated device mapping.
|
||||
podDevices podDevices
|
||||
store utilstore.Store
|
||||
pluginOpts map[string]*pluginapi.DevicePluginOptions
|
||||
podDevices podDevices
|
||||
pluginOpts map[string]*pluginapi.DevicePluginOptions
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
}
|
||||
|
||||
type sourcesReadyStub struct{}
|
||||
@ -122,11 +123,11 @@ func newManagerImpl(socketPath string) (*ManagerImpl, error) {
|
||||
// Before that, initializes them to perform no-op operations.
|
||||
manager.activePods = func() []*v1.Pod { return []*v1.Pod{} }
|
||||
manager.sourcesReady = &sourcesReadyStub{}
|
||||
var err error
|
||||
manager.store, err = utilstore.NewFileStore(dir, utilfs.DefaultFs{})
|
||||
checkpointManager, err := checkpointmanager.NewCheckpointManager(dir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize device plugin checkpointing store: %+v", err)
|
||||
return nil, fmt.Errorf("failed to initialize checkpoint manager: %+v", err)
|
||||
}
|
||||
manager.checkpointManager = checkpointManager
|
||||
|
||||
return manager, nil
|
||||
}
|
||||
@ -188,11 +189,6 @@ func (m *ManagerImpl) removeContents(dir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
// kubeletDeviceManagerCheckpoint is the file name of device plugin checkpoint
|
||||
kubeletDeviceManagerCheckpoint = "kubelet_internal_checkpoint"
|
||||
)
|
||||
|
||||
// checkpointFile returns device plugin checkpoint file path.
|
||||
func (m *ManagerImpl) checkpointFile() string {
|
||||
return filepath.Join(m.socketdir, kubeletDeviceManagerCheckpoint)
|
||||
@ -229,10 +225,14 @@ func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.Sourc
|
||||
return err
|
||||
}
|
||||
|
||||
m.wg.Add(1)
|
||||
m.server = grpc.NewServer([]grpc.ServerOption{}...)
|
||||
|
||||
pluginapi.RegisterRegistrationServer(m.server, m)
|
||||
go m.server.Serve(s)
|
||||
go func() {
|
||||
defer m.wg.Done()
|
||||
m.server.Serve(s)
|
||||
}()
|
||||
|
||||
glog.V(2).Infof("Serving device plugin registration server on %q", socketPath)
|
||||
|
||||
@ -318,6 +318,8 @@ func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest
|
||||
}
|
||||
|
||||
// Stop is the function that can stop the gRPC server.
|
||||
// Can be called concurrently, more than once, and is safe to call
|
||||
// without a prior Start.
|
||||
func (m *ManagerImpl) Stop() error {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
@ -325,7 +327,12 @@ func (m *ManagerImpl) Stop() error {
|
||||
e.stop()
|
||||
}
|
||||
|
||||
if m.server == nil {
|
||||
return nil
|
||||
}
|
||||
m.server.Stop()
|
||||
m.wg.Wait()
|
||||
m.server = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -338,6 +345,7 @@ func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
|
||||
// to avoid potential orphaned devices upon re-registration
|
||||
devices := make(map[string]pluginapi.Device)
|
||||
for _, device := range old.getDevices() {
|
||||
device.Health = pluginapi.Unhealthy
|
||||
devices[device.ID] = device
|
||||
}
|
||||
existingDevs = devices
|
||||
@ -377,18 +385,28 @@ func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
|
||||
go func() {
|
||||
e.run()
|
||||
e.stop()
|
||||
|
||||
m.mutex.Lock()
|
||||
if old, ok := m.endpoints[r.ResourceName]; ok && old == e {
|
||||
glog.V(2).Infof("Delete resource for endpoint %v", e)
|
||||
delete(m.endpoints, r.ResourceName)
|
||||
m.markResourceUnhealthy(r.ResourceName)
|
||||
}
|
||||
|
||||
glog.V(2).Infof("Unregistered endpoint %v", e)
|
||||
m.mutex.Unlock()
|
||||
}()
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) markResourceUnhealthy(resourceName string) {
|
||||
glog.V(2).Infof("Mark all resources Unhealthy for resource %s", resourceName)
|
||||
healthyDevices := sets.NewString()
|
||||
if _, ok := m.healthyDevices[resourceName]; ok {
|
||||
healthyDevices = m.healthyDevices[resourceName]
|
||||
m.healthyDevices[resourceName] = sets.NewString()
|
||||
}
|
||||
if _, ok := m.unhealthyDevices[resourceName]; !ok {
|
||||
m.unhealthyDevices[resourceName] = sets.NewString()
|
||||
}
|
||||
m.unhealthyDevices[resourceName] = m.unhealthyDevices[resourceName].Union(healthyDevices)
|
||||
}
|
||||
|
||||
// GetCapacity is expected to be called when Kubelet updates its node status.
|
||||
// The first returned variable contains the registered device plugin resource capacity.
|
||||
// The second returned variable contains the registered device plugin resource allocatable.
|
||||
@ -405,12 +423,20 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
|
||||
needsUpdateCheckpoint := false
|
||||
var capacity = v1.ResourceList{}
|
||||
var allocatable = v1.ResourceList{}
|
||||
var deletedResources []string
|
||||
deletedResources := sets.NewString()
|
||||
m.mutex.Lock()
|
||||
for resourceName, devices := range m.healthyDevices {
|
||||
if _, ok := m.endpoints[resourceName]; !ok {
|
||||
e, ok := m.endpoints[resourceName]
|
||||
if (ok && e.stopGracePeriodExpired()) || !ok {
|
||||
// The resources contained in endpoints and (un)healthyDevices
|
||||
// should always be consistent. Otherwise, we run with the risk
|
||||
// of failing to garbage collect non-existing resources or devices.
|
||||
if !ok {
|
||||
glog.Errorf("unexpected: healthyDevices and endpoints are out of sync")
|
||||
}
|
||||
delete(m.endpoints, resourceName)
|
||||
delete(m.healthyDevices, resourceName)
|
||||
deletedResources = append(deletedResources, resourceName)
|
||||
deletedResources.Insert(resourceName)
|
||||
needsUpdateCheckpoint = true
|
||||
} else {
|
||||
capacity[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(devices.Len()), resource.DecimalSI)
|
||||
@ -418,17 +444,14 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
|
||||
}
|
||||
}
|
||||
for resourceName, devices := range m.unhealthyDevices {
|
||||
if _, ok := m.endpoints[resourceName]; !ok {
|
||||
e, ok := m.endpoints[resourceName]
|
||||
if (ok && e.stopGracePeriodExpired()) || !ok {
|
||||
if !ok {
|
||||
glog.Errorf("unexpected: unhealthyDevices and endpoints are out of sync")
|
||||
}
|
||||
delete(m.endpoints, resourceName)
|
||||
delete(m.unhealthyDevices, resourceName)
|
||||
alreadyDeleted := false
|
||||
for _, name := range deletedResources {
|
||||
if name == resourceName {
|
||||
alreadyDeleted = true
|
||||
}
|
||||
}
|
||||
if !alreadyDeleted {
|
||||
deletedResources = append(deletedResources, resourceName)
|
||||
}
|
||||
deletedResources.Insert(resourceName)
|
||||
needsUpdateCheckpoint = true
|
||||
} else {
|
||||
capacityCount := capacity[v1.ResourceName(resourceName)]
|
||||
@ -441,36 +464,22 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
|
||||
if needsUpdateCheckpoint {
|
||||
m.writeCheckpoint()
|
||||
}
|
||||
return capacity, allocatable, deletedResources
|
||||
}
|
||||
|
||||
// checkpointData struct is used to store pod to device allocation information
|
||||
// and registered device information in a checkpoint file.
|
||||
// TODO: add version control when we need to change checkpoint format.
|
||||
type checkpointData struct {
|
||||
PodDeviceEntries []podDevicesCheckpointEntry
|
||||
RegisteredDevices map[string][]string
|
||||
return capacity, allocatable, deletedResources.UnsortedList()
|
||||
}
|
||||
|
||||
// Checkpoints device to container allocation information to disk.
|
||||
func (m *ManagerImpl) writeCheckpoint() error {
|
||||
m.mutex.Lock()
|
||||
data := checkpointData{
|
||||
PodDeviceEntries: m.podDevices.toCheckpointData(),
|
||||
RegisteredDevices: make(map[string][]string),
|
||||
}
|
||||
registeredDevs := make(map[string][]string)
|
||||
for resource, devices := range m.healthyDevices {
|
||||
data.RegisteredDevices[resource] = devices.UnsortedList()
|
||||
registeredDevs[resource] = devices.UnsortedList()
|
||||
}
|
||||
data := checkpoint.New(m.podDevices.toCheckpointData(),
|
||||
registeredDevs)
|
||||
m.mutex.Unlock()
|
||||
|
||||
dataJSON, err := json.Marshal(data)
|
||||
err := m.checkpointManager.CreateCheckpoint(kubeletDeviceManagerCheckpoint, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = m.store.Write(kubeletDeviceManagerCheckpoint, dataJSON)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write deviceplugin checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
return fmt.Errorf("failed to write checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -478,29 +487,28 @@ func (m *ManagerImpl) writeCheckpoint() error {
|
||||
// Reads device to container allocation information from disk, and populates
|
||||
// m.allocatedDevices accordingly.
|
||||
func (m *ManagerImpl) readCheckpoint() error {
|
||||
content, err := m.store.Read(kubeletDeviceManagerCheckpoint)
|
||||
registeredDevs := make(map[string][]string)
|
||||
devEntries := make([]checkpoint.PodDevicesEntry, 0)
|
||||
cp := checkpoint.New(devEntries, registeredDevs)
|
||||
err := m.checkpointManager.GetCheckpoint(kubeletDeviceManagerCheckpoint, cp)
|
||||
if err != nil {
|
||||
if err == utilstore.ErrKeyNotFound {
|
||||
if err == errors.ErrCheckpointNotFound {
|
||||
glog.Warningf("Failed to retrieve checkpoint for %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("failed to read checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
return err
|
||||
}
|
||||
glog.V(4).Infof("Read checkpoint file %s\n", kubeletDeviceManagerCheckpoint)
|
||||
var data checkpointData
|
||||
if err := json.Unmarshal(content, &data); err != nil {
|
||||
return fmt.Errorf("failed to unmarshal deviceplugin checkpoint data: %v", err)
|
||||
}
|
||||
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
m.podDevices.fromCheckpointData(data.PodDeviceEntries)
|
||||
podDevices, registeredDevs := cp.GetData()
|
||||
m.podDevices.fromCheckpointData(podDevices)
|
||||
m.allocatedDevices = m.podDevices.devices()
|
||||
for resource, devices := range data.RegisteredDevices {
|
||||
// TODO: Support Checkpointing for unhealthy devices as well
|
||||
for resource := range registeredDevs {
|
||||
// During start up, creates empty healthyDevices list so that the resource capacity
|
||||
// will stay zero till the corresponding device plugin re-registers.
|
||||
m.healthyDevices[resource] = sets.NewString()
|
||||
for _, dev := range devices {
|
||||
m.healthyDevices[resource].Insert(dev)
|
||||
}
|
||||
m.unhealthyDevices[resource] = sets.NewString()
|
||||
m.endpoints[resource] = newStoppedEndpointImpl(resource, make(map[string]pluginapi.Device))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -688,6 +696,8 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
|
||||
return m.podDevices.deviceRunContainerOptions(string(pod.UID), container.Name), nil
|
||||
}
|
||||
|
||||
// callPreStartContainerIfNeeded issues PreStartContainer grpc call for device plugin resource
|
||||
// with PreStartRequired option set.
|
||||
func (m *ManagerImpl) callPreStartContainerIfNeeded(podUID, contName, resource string) error {
|
||||
m.mutex.Lock()
|
||||
opts, ok := m.pluginOpts[resource]
|
||||
|
2
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager_stub.go
generated
vendored
2
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager_stub.go
generated
vendored
@ -21,7 +21,7 @@ import (
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
|
||||
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
|
||||
)
|
||||
|
||||
// ManagerStub provides a simple stub implementation for the Device Manager.
|
||||
|
157
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager_test.go
generated
vendored
157
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/manager_test.go
generated
vendored
@ -17,7 +17,6 @@ limitations under the License.
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
@ -34,10 +33,9 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -69,6 +67,29 @@ func TestNewManagerImplStart(t *testing.T) {
|
||||
defer os.RemoveAll(socketDir)
|
||||
m, p := setup(t, []*pluginapi.Device{}, func(n string, a, u, r []pluginapi.Device) {}, socketName, pluginSocketName)
|
||||
cleanup(t, m, p)
|
||||
// Stop should tolerate being called more than once.
|
||||
cleanup(t, m, p)
|
||||
}
|
||||
|
||||
func TestNewManagerImplStop(t *testing.T) {
|
||||
socketDir, socketName, pluginSocketName, err := tmpSocketDir()
|
||||
require.NoError(t, err)
|
||||
defer os.RemoveAll(socketDir)
|
||||
|
||||
m, err := newManagerImpl(socketName)
|
||||
require.NoError(t, err)
|
||||
// No prior Start, but that should be okay.
|
||||
err = m.Stop()
|
||||
require.NoError(t, err)
|
||||
|
||||
devs := []*pluginapi.Device{
|
||||
{ID: "Dev1", Health: pluginapi.Healthy},
|
||||
{ID: "Dev2", Health: pluginapi.Healthy},
|
||||
}
|
||||
p := NewDevicePluginStub(devs, pluginSocketName)
|
||||
// Same here.
|
||||
err = p.Stop()
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Tests that the device plugin manager correctly handles registration and re-registration by
|
||||
@ -192,7 +213,8 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
|
||||
// Adds three devices for resource1, two healthy and one unhealthy.
|
||||
// Expects capacity for resource1 to be 2.
|
||||
resourceName1 := "domain1.com/resource1"
|
||||
testManager.endpoints[resourceName1] = &endpointImpl{devices: make(map[string]pluginapi.Device)}
|
||||
e1 := &endpointImpl{devices: make(map[string]pluginapi.Device)}
|
||||
testManager.endpoints[resourceName1] = e1
|
||||
callback(resourceName1, devs, []pluginapi.Device{}, []pluginapi.Device{})
|
||||
capacity, allocatable, removedResources := testManager.GetCapacity()
|
||||
resource1Capacity, ok := capacity[v1.ResourceName(resourceName1)]
|
||||
@ -240,7 +262,8 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
|
||||
|
||||
// Tests adding another resource.
|
||||
resourceName2 := "resource2"
|
||||
testManager.endpoints[resourceName2] = &endpointImpl{devices: make(map[string]pluginapi.Device)}
|
||||
e2 := &endpointImpl{devices: make(map[string]pluginapi.Device)}
|
||||
testManager.endpoints[resourceName2] = e2
|
||||
callback(resourceName2, devs, []pluginapi.Device{}, []pluginapi.Device{})
|
||||
capacity, allocatable, removedResources = testManager.GetCapacity()
|
||||
as.Equal(2, len(capacity))
|
||||
@ -252,9 +275,9 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
|
||||
as.Equal(int64(2), resource2Allocatable.Value())
|
||||
as.Equal(0, len(removedResources))
|
||||
|
||||
// Removes resourceName1 endpoint. Verifies testManager.GetCapacity() reports that resourceName1
|
||||
// Expires resourceName1 endpoint. Verifies testManager.GetCapacity() reports that resourceName1
|
||||
// is removed from capacity and it no longer exists in healthyDevices after the call.
|
||||
delete(testManager.endpoints, resourceName1)
|
||||
e1.setStopTime(time.Now().Add(-1*endpointStopGracePeriod - time.Duration(10)*time.Second))
|
||||
capacity, allocatable, removed := testManager.GetCapacity()
|
||||
as.Equal([]string{resourceName1}, removed)
|
||||
_, ok = capacity[v1.ResourceName(resourceName1)]
|
||||
@ -266,9 +289,49 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
|
||||
as.False(ok)
|
||||
_, ok = testManager.unhealthyDevices[resourceName1]
|
||||
as.False(ok)
|
||||
fmt.Println("removed: ", removed)
|
||||
as.Equal(1, len(removed))
|
||||
_, ok = testManager.endpoints[resourceName1]
|
||||
as.False(ok)
|
||||
as.Equal(1, len(testManager.endpoints))
|
||||
|
||||
// Stops resourceName2 endpoint. Verifies its stopTime is set, allocate and
|
||||
// preStartContainer calls return errors.
|
||||
e2.stop()
|
||||
as.False(e2.stopTime.IsZero())
|
||||
_, err = e2.allocate([]string{"Device1"})
|
||||
reflect.DeepEqual(err, fmt.Errorf(errEndpointStopped, e2))
|
||||
_, err = e2.preStartContainer([]string{"Device1"})
|
||||
reflect.DeepEqual(err, fmt.Errorf(errEndpointStopped, e2))
|
||||
// Marks resourceName2 unhealthy and verifies its capacity/allocatable are
|
||||
// correctly updated.
|
||||
testManager.markResourceUnhealthy(resourceName2)
|
||||
capacity, allocatable, removed = testManager.GetCapacity()
|
||||
val, ok = capacity[v1.ResourceName(resourceName2)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(3), val.Value())
|
||||
val, ok = allocatable[v1.ResourceName(resourceName2)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(0), val.Value())
|
||||
as.Empty(removed)
|
||||
// Writes and re-reads checkpoints. Verifies we create a stopped endpoint
|
||||
// for resourceName2, its capacity is set to zero, and we still consider
|
||||
// it as a DevicePlugin resource. This makes sure any pod that was scheduled
|
||||
// during the time of propagating capacity change to the scheduler will be
|
||||
// properly rejected instead of being incorrectly started.
|
||||
err = testManager.writeCheckpoint()
|
||||
as.Nil(err)
|
||||
testManager.healthyDevices = make(map[string]sets.String)
|
||||
testManager.unhealthyDevices = make(map[string]sets.String)
|
||||
err = testManager.readCheckpoint()
|
||||
as.Nil(err)
|
||||
as.Equal(1, len(testManager.endpoints))
|
||||
_, ok = testManager.endpoints[resourceName2]
|
||||
as.True(ok)
|
||||
capacity, allocatable, removed = testManager.GetCapacity()
|
||||
val, ok = capacity[v1.ResourceName(resourceName2)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(0), val.Value())
|
||||
as.Empty(removed)
|
||||
as.True(testManager.isDevicePluginResource(resourceName2))
|
||||
}
|
||||
|
||||
func constructDevices(devices []string) sets.String {
|
||||
@ -305,18 +368,19 @@ func constructAllocResp(devices, mounts, envs map[string]string) *pluginapi.Cont
|
||||
func TestCheckpoint(t *testing.T) {
|
||||
resourceName1 := "domain1.com/resource1"
|
||||
resourceName2 := "domain2.com/resource2"
|
||||
|
||||
as := assert.New(t)
|
||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||
as.Nil(err)
|
||||
defer os.RemoveAll(tmpDir)
|
||||
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
|
||||
as.Nil(err)
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
endpoints: make(map[string]endpoint),
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
unhealthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
checkpointManager: ckm,
|
||||
}
|
||||
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
|
||||
|
||||
testManager.podDevices.insert("pod1", "con1", resourceName1,
|
||||
constructDevices([]string{"dev1", "dev2"}),
|
||||
@ -414,6 +478,10 @@ func (m *MockEndpoint) allocate(devs []string) (*pluginapi.AllocateResponse, err
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockEndpoint) isStopped() bool { return false }
|
||||
|
||||
func (m *MockEndpoint) stopGracePeriodExpired() bool { return false }
|
||||
|
||||
func makePod(limits v1.ResourceList) *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
@ -431,20 +499,25 @@ func makePod(limits v1.ResourceList) *v1.Pod {
|
||||
}
|
||||
}
|
||||
|
||||
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource, opts map[string]*pluginapi.DevicePluginOptions) *ManagerImpl {
|
||||
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource, opts map[string]*pluginapi.DevicePluginOptions) (*ManagerImpl, error) {
|
||||
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
callback: monitorCallback,
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
endpoints: make(map[string]endpoint),
|
||||
pluginOpts: opts,
|
||||
podDevices: make(podDevices),
|
||||
activePods: activePods,
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
callback: monitorCallback,
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
unhealthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
endpoints: make(map[string]endpoint),
|
||||
pluginOpts: opts,
|
||||
podDevices: make(podDevices),
|
||||
activePods: activePods,
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
checkpointManager: ckm,
|
||||
}
|
||||
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
|
||||
for _, res := range testRes {
|
||||
testManager.healthyDevices[res.resourceName] = sets.NewString()
|
||||
for _, dev := range res.devs {
|
||||
@ -476,7 +549,7 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
||||
}
|
||||
}
|
||||
}
|
||||
return testManager
|
||||
return testManager, nil
|
||||
}
|
||||
|
||||
func getTestNodeInfo(allocatable v1.ResourceList) *schedulercache.NodeInfo {
|
||||
@ -497,7 +570,6 @@ type TestResource struct {
|
||||
}
|
||||
|
||||
func TestPodContainerDeviceAllocation(t *testing.T) {
|
||||
flag.Set("alsologtostderr", fmt.Sprintf("%t", true))
|
||||
res1 := TestResource{
|
||||
resourceName: "domain1.com/resource1",
|
||||
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||
@ -520,7 +592,8 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
|
||||
defer os.RemoveAll(tmpDir)
|
||||
nodeInfo := getTestNodeInfo(v1.ResourceList{})
|
||||
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
|
||||
testManager := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
as.Nil(err)
|
||||
|
||||
testPods := []*v1.Pod{
|
||||
makePod(v1.ResourceList{
|
||||
@ -615,7 +688,8 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
|
||||
as.Nil(err)
|
||||
defer os.RemoveAll(tmpDir)
|
||||
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
|
||||
testManager := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
as.Nil(err)
|
||||
|
||||
podWithPluginResourcesInInitContainers := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
@ -693,14 +767,18 @@ func TestSanitizeNodeAllocatable(t *testing.T) {
|
||||
|
||||
as := assert.New(t)
|
||||
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
|
||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||
as.Nil(err)
|
||||
|
||||
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
|
||||
as.Nil(err)
|
||||
testManager := &ManagerImpl{
|
||||
callback: monitorCallback,
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
callback: monitorCallback,
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
checkpointManager: ckm,
|
||||
}
|
||||
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
|
||||
// require one of resource1 and one of resource2
|
||||
testManager.allocatedDevices[resourceName1] = sets.NewString()
|
||||
testManager.allocatedDevices[resourceName1].Insert(devID1)
|
||||
@ -747,7 +825,8 @@ func TestDevicePreStartContainer(t *testing.T) {
|
||||
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
|
||||
pluginOpts[res1.resourceName] = &pluginapi.DevicePluginOptions{PreStartRequired: true}
|
||||
|
||||
testManager := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1}, pluginOpts)
|
||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1}, pluginOpts)
|
||||
as.Nil(err)
|
||||
|
||||
ch := make(chan []string, 1)
|
||||
testManager.endpoints[res1.resourceName] = &MockEndpoint{
|
||||
|
23
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/pod_devices.go
generated
vendored
23
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/pod_devices.go
generated
vendored
@ -21,6 +21,7 @@ import (
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
@ -126,18 +127,9 @@ func (pdev podDevices) devices() map[string]sets.String {
|
||||
return ret
|
||||
}
|
||||
|
||||
// podDevicesCheckpointEntry is used to record <pod, container> to device allocation information.
|
||||
type podDevicesCheckpointEntry struct {
|
||||
PodUID string
|
||||
ContainerName string
|
||||
ResourceName string
|
||||
DeviceIDs []string
|
||||
AllocResp []byte
|
||||
}
|
||||
|
||||
// Turns podDevices to checkpointData.
|
||||
func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
|
||||
var data []podDevicesCheckpointEntry
|
||||
func (pdev podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
|
||||
var data []checkpoint.PodDevicesEntry
|
||||
for podUID, containerDevices := range pdev {
|
||||
for conName, resources := range containerDevices {
|
||||
for resource, devices := range resources {
|
||||
@ -152,7 +144,12 @@ func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
|
||||
glog.Errorf("Can't marshal allocResp for %v %v %v: %v", podUID, conName, resource, err)
|
||||
continue
|
||||
}
|
||||
data = append(data, podDevicesCheckpointEntry{podUID, conName, resource, devIds, allocResp})
|
||||
data = append(data, checkpoint.PodDevicesEntry{
|
||||
PodUID: podUID,
|
||||
ContainerName: conName,
|
||||
ResourceName: resource,
|
||||
DeviceIDs: devIds,
|
||||
AllocResp: allocResp})
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -160,7 +157,7 @@ func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
|
||||
}
|
||||
|
||||
// Populates podDevices from the passed in checkpointData.
|
||||
func (pdev podDevices) fromCheckpointData(data []podDevicesCheckpointEntry) {
|
||||
func (pdev podDevices) fromCheckpointData(data []checkpoint.PodDevicesEntry) {
|
||||
for _, entry := range data {
|
||||
glog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
|
||||
entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, entry.AllocResp)
|
||||
|
15
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/types.go
generated
vendored
15
vendor/k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/types.go
generated
vendored
@ -17,12 +17,14 @@ limitations under the License.
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
|
||||
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
|
||||
)
|
||||
|
||||
// Manager manages all the Device Plugins running on a node.
|
||||
@ -86,6 +88,8 @@ const (
|
||||
errInvalidResourceName = "the ResourceName %q is invalid"
|
||||
// errEmptyResourceName is the error raised when the resource name field is empty
|
||||
errEmptyResourceName = "invalid Empty ResourceName"
|
||||
// errEndpointStopped indicates that the endpoint has been stopped
|
||||
errEndpointStopped = "endpoint %v has been stopped"
|
||||
|
||||
// errBadSocket is the error raised when the registry socket path is not absolute
|
||||
errBadSocket = "bad socketPath, must be an absolute path:"
|
||||
@ -96,3 +100,12 @@ const (
|
||||
// errListAndWatch is the error raised when ListAndWatch ended unsuccessfully
|
||||
errListAndWatch = "listAndWatch ended unexpectedly for device plugin %s with error %v"
|
||||
)
|
||||
|
||||
// endpointStopGracePeriod indicates the grace period after an endpoint is stopped
|
||||
// because its device plugin fails. DeviceManager keeps the stopped endpoint in its
|
||||
// cache during this grace period to cover the time gap for the capacity change to
|
||||
// take effect.
|
||||
const endpointStopGracePeriod = time.Duration(5) * time.Minute
|
||||
|
||||
// kubeletDeviceManagerCheckpoint is the file name of device plugin checkpoint
|
||||
const kubeletDeviceManagerCheckpoint = "kubelet_internal_checkpoint"
|
||||
|
Reference in New Issue
Block a user