Fresh dep ensure

This commit is contained in:
Mike Cronce
2018-11-26 13:23:56 -05:00
parent 93cb8a04d7
commit 407478ab9a
9016 changed files with 551394 additions and 279685 deletions

View File

@ -15,6 +15,8 @@ go_library(
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
"//pkg/kubelet/apis/pluginregistration/v1:go_default_library",
"//pkg/kubelet/apis/podresources/v1alpha1:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
"//pkg/kubelet/cm/devicemanager/checkpoint:go_default_library",
@ -22,12 +24,13 @@ go_library(
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/util/pluginwatcher:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/google.golang.org/grpc:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
@ -40,16 +43,18 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
"//pkg/kubelet/apis/pluginregistration/v1:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/util/pluginwatcher:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/github.com/stretchr/testify/require:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
],
)

View File

@ -28,12 +28,15 @@ import (
"google.golang.org/grpc"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
watcherapi "k8s.io/kubernetes/pkg/kubelet/apis/pluginregistration/v1"
)
// Stub implementation for DevicePlugin.
type Stub struct {
devs []*pluginapi.Device
socket string
devs []*pluginapi.Device
socket string
resourceName string
preStartContainerFlag bool
stop chan interface{}
wg sync.WaitGroup
@ -43,6 +46,10 @@ type Stub struct {
// allocFunc is used for handling allocation request
allocFunc stubAllocFunc
registrationStatus chan watcherapi.RegistrationStatus // for testing
endpoint string // for testing
}
// stubAllocFunc is the function called when receive an allocation request from Kubelet
@ -55,10 +62,12 @@ func defaultAllocFunc(r *pluginapi.AllocateRequest, devs map[string]pluginapi.De
}
// NewDevicePluginStub returns an initialized DevicePlugin Stub.
func NewDevicePluginStub(devs []*pluginapi.Device, socket string) *Stub {
func NewDevicePluginStub(devs []*pluginapi.Device, socket string, name string, preStartContainerFlag bool) *Stub {
return &Stub{
devs: devs,
socket: socket,
devs: devs,
socket: socket,
resourceName: name,
preStartContainerFlag: preStartContainerFlag,
stop: make(chan interface{}),
update: make(chan []*pluginapi.Device),
@ -88,6 +97,7 @@ func (m *Stub) Start() error {
m.wg.Add(1)
m.server = grpc.NewServer([]grpc.ServerOption{}...)
pluginapi.RegisterDevicePluginServer(m.server, m)
watcherapi.RegisterRegistrationServer(m.server, m)
go func() {
defer m.wg.Done()
@ -118,8 +128,36 @@ func (m *Stub) Stop() error {
return m.cleanup()
}
// GetInfo is the RPC which return pluginInfo
func (m *Stub) GetInfo(ctx context.Context, req *watcherapi.InfoRequest) (*watcherapi.PluginInfo, error) {
log.Println("GetInfo")
return &watcherapi.PluginInfo{
Type: watcherapi.DevicePlugin,
Name: m.resourceName,
Endpoint: m.endpoint,
SupportedVersions: []string{pluginapi.Version}}, nil
}
// NotifyRegistrationStatus receives the registration notification from watcher
func (m *Stub) NotifyRegistrationStatus(ctx context.Context, status *watcherapi.RegistrationStatus) (*watcherapi.RegistrationStatusResponse, error) {
if m.registrationStatus != nil {
m.registrationStatus <- *status
}
if !status.PluginRegistered {
log.Println("Registration failed: ", status.Error)
}
return &watcherapi.RegistrationStatusResponse{}, nil
}
// Register registers the device plugin for the given resourceName with Kubelet.
func (m *Stub) Register(kubeletEndpoint, resourceName string, preStartContainerFlag bool) error {
func (m *Stub) Register(kubeletEndpoint, resourceName string, pluginSockDir string) error {
if pluginSockDir != "" {
if _, err := os.Stat(pluginSockDir + "DEPRECATION"); err == nil {
log.Println("Deprecation file found. Skip registration.")
return nil
}
}
log.Println("Deprecation file not found. Invoke registration")
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
@ -127,16 +165,16 @@ func (m *Stub) Register(kubeletEndpoint, resourceName string, preStartContainerF
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
return net.DialTimeout("unix", addr, timeout)
}))
defer conn.Close()
if err != nil {
return err
}
defer conn.Close()
client := pluginapi.NewRegistrationClient(conn)
reqt := &pluginapi.RegisterRequest{
Version: pluginapi.Version,
Endpoint: path.Base(m.socket),
ResourceName: resourceName,
Options: &pluginapi.DevicePluginOptions{PreStartRequired: preStartContainerFlag},
Options: &pluginapi.DevicePluginOptions{PreStartRequired: m.preStartContainerFlag},
}
_, err = client.Register(context.Background(), reqt)
@ -148,7 +186,7 @@ func (m *Stub) Register(kubeletEndpoint, resourceName string, preStartContainerF
// GetDevicePluginOptions returns DevicePluginOptions settings for the device plugin.
func (m *Stub) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
return &pluginapi.DevicePluginOptions{}, nil
return &pluginapi.DevicePluginOptions{PreStartRequired: m.preStartContainerFlag}, nil
}
// PreStartContainer resets the devices received

View File

@ -23,8 +23,8 @@ import (
"sync"
"time"
"github.com/golang/glog"
"google.golang.org/grpc"
"k8s.io/klog"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
)
@ -37,8 +37,7 @@ type endpoint interface {
stop()
allocate(devs []string) (*pluginapi.AllocateResponse, error)
preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error)
getDevices() []pluginapi.Device
callback(resourceName string, added, updated, deleted []pluginapi.Device)
callback(resourceName string, devices []pluginapi.Device)
isStopped() bool
stopGracePeriodExpired() bool
}
@ -51,18 +50,16 @@ type endpointImpl struct {
resourceName string
stopTime time.Time
devices map[string]pluginapi.Device
mutex sync.Mutex
cb monitorCallback
mutex sync.Mutex
cb monitorCallback
}
// newEndpoint creates a new endpoint for the given resourceName.
// This is to be used during normal device plugin registration.
func newEndpointImpl(socketPath, resourceName string, devices map[string]pluginapi.Device, callback monitorCallback) (*endpointImpl, error) {
func newEndpointImpl(socketPath, resourceName string, callback monitorCallback) (*endpointImpl, error) {
client, c, err := dial(socketPath)
if err != nil {
glog.Errorf("Can't create new endpoint with path %s err %v", socketPath, err)
klog.Errorf("Can't create new endpoint with path %s err %v", socketPath, err)
return nil, err
}
@ -73,120 +70,52 @@ func newEndpointImpl(socketPath, resourceName string, devices map[string]plugina
socketPath: socketPath,
resourceName: resourceName,
devices: devices,
cb: callback,
cb: callback,
}, nil
}
// newStoppedEndpointImpl creates a new endpoint for the given resourceName with stopTime set.
// This is to be used during Kubelet restart, before the actual device plugin re-registers.
func newStoppedEndpointImpl(resourceName string, devices map[string]pluginapi.Device) *endpointImpl {
func newStoppedEndpointImpl(resourceName string) *endpointImpl {
return &endpointImpl{
resourceName: resourceName,
devices: devices,
stopTime: time.Now(),
}
}
func (e *endpointImpl) callback(resourceName string, added, updated, deleted []pluginapi.Device) {
e.cb(resourceName, added, updated, deleted)
}
func (e *endpointImpl) getDevices() []pluginapi.Device {
e.mutex.Lock()
defer e.mutex.Unlock()
var devs []pluginapi.Device
for _, d := range e.devices {
devs = append(devs, d)
}
return devs
func (e *endpointImpl) callback(resourceName string, devices []pluginapi.Device) {
e.cb(resourceName, devices)
}
// run initializes ListAndWatch gRPC call for the device plugin and
// blocks on receiving ListAndWatch gRPC stream updates. Each ListAndWatch
// stream update contains a new list of device states. listAndWatch compares the new
// device states with its cached states to get list of new, updated, and deleted devices.
// stream update contains a new list of device states.
// It then issues a callback to pass this information to the device manager which
// will adjust the resource available information accordingly.
func (e *endpointImpl) run() {
stream, err := e.client.ListAndWatch(context.Background(), &pluginapi.Empty{})
if err != nil {
glog.Errorf(errListAndWatch, e.resourceName, err)
klog.Errorf(errListAndWatch, e.resourceName, err)
return
}
devices := make(map[string]pluginapi.Device)
e.mutex.Lock()
for _, d := range e.devices {
devices[d.ID] = d
}
e.mutex.Unlock()
for {
response, err := stream.Recv()
if err != nil {
glog.Errorf(errListAndWatch, e.resourceName, err)
klog.Errorf(errListAndWatch, e.resourceName, err)
return
}
devs := response.Devices
glog.V(2).Infof("State pushed for device plugin %s", e.resourceName)
newDevs := make(map[string]*pluginapi.Device)
var added, updated []pluginapi.Device
klog.V(2).Infof("State pushed for device plugin %s", e.resourceName)
var newDevs []pluginapi.Device
for _, d := range devs {
dOld, ok := devices[d.ID]
newDevs[d.ID] = d
if !ok {
glog.V(2).Infof("New device for Endpoint %s: %v", e.resourceName, d)
devices[d.ID] = *d
added = append(added, *d)
continue
}
if d.Health == dOld.Health {
continue
}
if d.Health == pluginapi.Unhealthy {
glog.Errorf("Device %s is now Unhealthy", d.ID)
} else if d.Health == pluginapi.Healthy {
glog.V(2).Infof("Device %s is now Healthy", d.ID)
}
devices[d.ID] = *d
updated = append(updated, *d)
newDevs = append(newDevs, *d)
}
var deleted []pluginapi.Device
for id, d := range devices {
if _, ok := newDevs[id]; ok {
continue
}
glog.Errorf("Device %s was deleted", d.ID)
deleted = append(deleted, d)
delete(devices, id)
}
e.mutex.Lock()
// NOTE: Return a copy of 'devices' instead of returning a direct reference to local 'devices'
e.devices = make(map[string]pluginapi.Device)
for _, d := range devices {
e.devices[d.ID] = d
}
e.mutex.Unlock()
e.callback(e.resourceName, added, updated, deleted)
e.callback(e.resourceName, newDevs)
}
}

View File

@ -37,7 +37,7 @@ func TestNewEndpoint(t *testing.T) {
{ID: "ADeviceId", Health: pluginapi.Healthy},
}
p, e := esetup(t, devs, socket, "mock", func(n string, a, u, r []pluginapi.Device) {})
p, e := esetup(t, devs, socket, "mock", func(n string, d []pluginapi.Device) {})
defer ecleanup(t, p, e)
}
@ -58,7 +58,7 @@ func TestRun(t *testing.T) {
callbackCount := 0
callbackChan := make(chan int)
callback := func(n string, a, u, r []pluginapi.Device) {
callback := func(n string, devices []pluginapi.Device) {
// Should be called twice:
// one for plugin registration, one for plugin update.
if callbackCount > 2 {
@ -67,23 +67,24 @@ func TestRun(t *testing.T) {
// Check plugin registration
if callbackCount == 0 {
require.Len(t, a, 3)
require.Len(t, u, 0)
require.Len(t, r, 0)
require.Len(t, devices, 3)
require.Equal(t, devices[0].ID, devs[0].ID)
require.Equal(t, devices[1].ID, devs[1].ID)
require.Equal(t, devices[2].ID, devs[2].ID)
require.Equal(t, devices[0].Health, devs[0].Health)
require.Equal(t, devices[1].Health, devs[1].Health)
require.Equal(t, devices[2].Health, devs[2].Health)
}
// Check plugin update
if callbackCount == 1 {
require.Len(t, a, 1)
require.Len(t, u, 2)
require.Len(t, r, 1)
require.Equal(t, a[0].ID, updated[2].ID)
require.Equal(t, u[0].ID, updated[0].ID)
require.Equal(t, u[0].Health, updated[0].Health)
require.Equal(t, u[1].ID, updated[1].ID)
require.Equal(t, u[1].Health, updated[1].Health)
require.Equal(t, r[0].ID, devs[1].ID)
require.Len(t, devices, 3)
require.Equal(t, devices[0].ID, updated[0].ID)
require.Equal(t, devices[1].ID, updated[1].ID)
require.Equal(t, devices[2].ID, updated[2].ID)
require.Equal(t, devices[0].Health, updated[0].Health)
require.Equal(t, devices[1].Health, updated[1].Health)
require.Equal(t, devices[2].Health, updated[2].Health)
}
callbackCount++
@ -102,18 +103,7 @@ func TestRun(t *testing.T) {
// Wait for the second callback to be issued.
<-callbackChan
e.mutex.Lock()
defer e.mutex.Unlock()
require.Len(t, e.devices, 3)
for _, dref := range updated {
d, ok := e.devices[dref.ID]
require.True(t, ok)
require.Equal(t, d.ID, dref.ID)
require.Equal(t, d.Health, dref.Health)
}
require.Equal(t, callbackCount, 2)
}
func TestAllocate(t *testing.T) {
@ -123,7 +113,7 @@ func TestAllocate(t *testing.T) {
}
callbackCount := 0
callbackChan := make(chan int)
p, e := esetup(t, devs, socket, "mock", func(n string, a, u, r []pluginapi.Device) {
p, e := esetup(t, devs, socket, "mock", func(n string, d []pluginapi.Device) {
callbackCount++
callbackChan <- callbackCount
})
@ -169,23 +159,13 @@ func TestAllocate(t *testing.T) {
require.Equal(t, resp, respOut)
}
func TestGetDevices(t *testing.T) {
e := endpointImpl{
devices: map[string]pluginapi.Device{
"ADeviceId": {ID: "ADeviceId", Health: pluginapi.Healthy},
},
}
devs := e.getDevices()
require.Len(t, devs, 1)
}
func esetup(t *testing.T, devs []*pluginapi.Device, socket, resourceName string, callback monitorCallback) (*Stub, *endpointImpl) {
p := NewDevicePluginStub(devs, socket)
p := NewDevicePluginStub(devs, socket, resourceName, false)
err := p.Start()
require.NoError(t, err)
e, err := newEndpointImpl(socket, resourceName, make(map[string]pluginapi.Device), callback)
e, err := newEndpointImpl(socket, resourceName, callback)
require.NoError(t, err)
return p, e

View File

@ -25,20 +25,22 @@ import (
"sync"
"time"
"github.com/golang/glog"
"google.golang.org/grpc"
"k8s.io/klog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
"k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
watcher "k8s.io/kubernetes/pkg/kubelet/util/pluginwatcher"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@ -48,14 +50,14 @@ type ActivePodsFunc func() []*v1.Pod
// monitorCallback is the function called when a device's health state changes,
// or new devices are reported, or old devices are deleted.
// Updated contains the most recent state of the Device.
type monitorCallback func(resourceName string, added, updated, deleted []pluginapi.Device)
type monitorCallback func(resourceName string, devices []pluginapi.Device)
// ManagerImpl is the structure in charge of managing Device Plugins.
type ManagerImpl struct {
socketname string
socketdir string
endpoints map[string]endpoint // Key is ResourceName
endpoints map[string]endpointInfo // Key is ResourceName
mutex sync.Mutex
server *grpc.Server
@ -85,10 +87,14 @@ type ManagerImpl struct {
// podDevices contains pod to allocated device mapping.
podDevices podDevices
pluginOpts map[string]*pluginapi.DevicePluginOptions
checkpointManager checkpointmanager.CheckpointManager
}
type endpointInfo struct {
e endpoint
opts *pluginapi.DevicePluginOptions
}
type sourcesReadyStub struct{}
func (s *sourcesReadyStub) AddSource(source string) {}
@ -100,21 +106,21 @@ func NewManagerImpl() (*ManagerImpl, error) {
}
func newManagerImpl(socketPath string) (*ManagerImpl, error) {
glog.V(2).Infof("Creating Device Plugin manager at %s", socketPath)
klog.V(2).Infof("Creating Device Plugin manager at %s", socketPath)
if socketPath == "" || !filepath.IsAbs(socketPath) {
return nil, fmt.Errorf(errBadSocket+" %v", socketPath)
return nil, fmt.Errorf(errBadSocket+" %s", socketPath)
}
dir, file := filepath.Split(socketPath)
manager := &ManagerImpl{
endpoints: make(map[string]endpoint),
endpoints: make(map[string]endpointInfo),
socketname: file,
socketdir: dir,
healthyDevices: make(map[string]sets.String),
unhealthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
pluginOpts: make(map[string]*pluginapi.DevicePluginOptions),
podDevices: make(podDevices),
}
manager.callback = manager.genericDeviceUpdateCallback
@ -125,35 +131,24 @@ func newManagerImpl(socketPath string) (*ManagerImpl, error) {
manager.sourcesReady = &sourcesReadyStub{}
checkpointManager, err := checkpointmanager.NewCheckpointManager(dir)
if err != nil {
return nil, fmt.Errorf("failed to initialize checkpoint manager: %+v", err)
return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
}
manager.checkpointManager = checkpointManager
return manager, nil
}
func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, added, updated, deleted []pluginapi.Device) {
kept := append(updated, added...)
func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices []pluginapi.Device) {
m.mutex.Lock()
if _, ok := m.healthyDevices[resourceName]; !ok {
m.healthyDevices[resourceName] = sets.NewString()
}
if _, ok := m.unhealthyDevices[resourceName]; !ok {
m.unhealthyDevices[resourceName] = sets.NewString()
}
for _, dev := range kept {
m.healthyDevices[resourceName] = sets.NewString()
m.unhealthyDevices[resourceName] = sets.NewString()
for _, dev := range devices {
if dev.Health == pluginapi.Healthy {
m.healthyDevices[resourceName].Insert(dev.ID)
m.unhealthyDevices[resourceName].Delete(dev.ID)
} else {
m.unhealthyDevices[resourceName].Insert(dev.ID)
m.healthyDevices[resourceName].Delete(dev.ID)
}
}
for _, dev := range deleted {
m.healthyDevices[resourceName].Delete(dev.ID)
m.unhealthyDevices[resourceName].Delete(dev.ID)
}
m.mutex.Unlock()
m.writeCheckpoint()
}
@ -175,7 +170,7 @@ func (m *ManagerImpl) removeContents(dir string) error {
}
stat, err := os.Stat(filePath)
if err != nil {
glog.Errorf("Failed to stat file %v: %v", filePath, err)
klog.Errorf("Failed to stat file %s: %v", filePath, err)
continue
}
if stat.IsDir() {
@ -198,8 +193,7 @@ func (m *ManagerImpl) checkpointFile() string {
// podDevices and allocatedDevices information from checkpoint-ed state and
// starts device plugin registration service.
func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error {
glog.V(2).Infof("Starting Device Plugin manager")
fmt.Println("Starting Device Plugin manager")
klog.V(2).Infof("Starting Device Plugin manager")
m.activePods = activePods
m.sourcesReady = sourcesReady
@ -207,7 +201,7 @@ func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.Sourc
// Loads in allocatedDevices information from disk.
err := m.readCheckpoint()
if err != nil {
glog.Warningf("Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date. Err: %v", err)
klog.Warningf("Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date. Err: %v", err)
}
socketPath := filepath.Join(m.socketdir, m.socketname)
@ -216,12 +210,12 @@ func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.Sourc
// Removes all stale sockets in m.socketdir. Device plugins can monitor
// this and use it as a signal to re-register with the new Kubelet.
if err := m.removeContents(m.socketdir); err != nil {
glog.Errorf("Fail to clean up stale contents under %s: %+v", m.socketdir, err)
klog.Errorf("Fail to clean up stale contents under %s: %v", m.socketdir, err)
}
s, err := net.Listen("unix", socketPath)
if err != nil {
glog.Errorf(errListenSocket+" %+v", err)
klog.Errorf(errListenSocket+" %v", err)
return err
}
@ -234,24 +228,87 @@ func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.Sourc
m.server.Serve(s)
}()
glog.V(2).Infof("Serving device plugin registration server on %q", socketPath)
klog.V(2).Infof("Serving device plugin registration server on %q", socketPath)
return nil
}
// Devices is the map of devices that are known by the Device
// Plugin manager with the kind of the devices as key
func (m *ManagerImpl) Devices() map[string][]pluginapi.Device {
// GetWatcherHandler returns the plugin handler
func (m *ManagerImpl) GetWatcherHandler() watcher.PluginHandler {
if f, err := os.Create(m.socketdir + "DEPRECATION"); err != nil {
klog.Errorf("Failed to create deprecation file at %s", m.socketdir)
} else {
f.Close()
klog.V(4).Infof("created deprecation file %s", f.Name())
}
return watcher.PluginHandler(m)
}
// ValidatePlugin validates a plugin if the version is correct and the name has the format of an extended resource
func (m *ManagerImpl) ValidatePlugin(pluginName string, endpoint string, versions []string, foundInDeprecatedDir bool) error {
klog.V(2).Infof("Got Plugin %s at endpoint %s with versions %v", pluginName, endpoint, versions)
if !m.isVersionCompatibleWithPlugin(versions) {
return fmt.Errorf("manager version, %s, is not among plugin supported versions %v", pluginapi.Version, versions)
}
if !v1helper.IsExtendedResourceName(v1.ResourceName(pluginName)) {
return fmt.Errorf("invalid name of device plugin socket: %s", fmt.Sprintf(errInvalidResourceName, pluginName))
}
return nil
}
// RegisterPlugin starts the endpoint and registers it
// TODO: Start the endpoint and wait for the First ListAndWatch call
// before registering the plugin
func (m *ManagerImpl) RegisterPlugin(pluginName string, endpoint string, versions []string) error {
klog.V(2).Infof("Registering Plugin %s at endpoint %s", pluginName, endpoint)
e, err := newEndpointImpl(endpoint, pluginName, m.callback)
if err != nil {
return fmt.Errorf("Failed to dial device plugin with socketPath %s: %v", endpoint, err)
}
options, err := e.client.GetDevicePluginOptions(context.Background(), &pluginapi.Empty{})
if err != nil {
return fmt.Errorf("Failed to get device plugin options: %v", err)
}
m.registerEndpoint(pluginName, options, e)
go m.runEndpoint(pluginName, e)
return nil
}
// DeRegisterPlugin deregisters the plugin
// TODO work on the behavior for deregistering plugins
// e.g: Should we delete the resource
func (m *ManagerImpl) DeRegisterPlugin(pluginName string) {
m.mutex.Lock()
defer m.mutex.Unlock()
devs := make(map[string][]pluginapi.Device)
for k, e := range m.endpoints {
glog.V(3).Infof("Endpoint: %+v: %p", k, e)
devs[k] = e.getDevices()
// Note: This will mark the resource unhealthy as per the behavior
// in runEndpoint
if eI, ok := m.endpoints[pluginName]; ok {
eI.e.stop()
}
}
return devs
func (m *ManagerImpl) isVersionCompatibleWithPlugin(versions []string) bool {
// TODO(vikasc): Currently this is fine as we only have a single supported version. When we do need to support
// multiple versions in the future, we may need to extend this function to return a supported version.
// E.g., say kubelet supports v1beta1 and v1beta2, and we get v1alpha1 and v1beta1 from a device plugin,
// this function should return v1beta1
for _, version := range versions {
for _, supportedVersion := range pluginapi.SupportedVersions {
if version == supportedVersion {
return true
}
}
}
return false
}
// Allocate is the call that you can use to allocate a set of devices
@ -259,7 +316,6 @@ func (m *ManagerImpl) Devices() map[string][]pluginapi.Device {
func (m *ManagerImpl) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
pod := attrs.Pod
devicesToReuse := make(map[string]sets.String)
// TODO: Reuse devices between init containers and regular containers.
for _, container := range pod.Spec.InitContainers {
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
return err
@ -287,7 +343,7 @@ func (m *ManagerImpl) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.P
// Register registers a device plugin.
func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest) (*pluginapi.Empty, error) {
glog.Infof("Got registration request from device plugin with resource name %q", r.ResourceName)
klog.Infof("Got registration request from device plugin with resource name %q", r.ResourceName)
metrics.DevicePluginRegistrationCount.WithLabelValues(r.ResourceName).Inc()
var versionCompatible bool
for _, v := range pluginapi.SupportedVersions {
@ -298,13 +354,13 @@ func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest
}
if !versionCompatible {
errorString := fmt.Sprintf(errUnsupportedVersion, r.Version, pluginapi.SupportedVersions)
glog.Infof("Bad registration request from device plugin with resource name %q: %v", r.ResourceName, errorString)
klog.Infof("Bad registration request from device plugin with resource name %q: %s", r.ResourceName, errorString)
return &pluginapi.Empty{}, fmt.Errorf(errorString)
}
if !v1helper.IsExtendedResourceName(v1.ResourceName(r.ResourceName)) {
errorString := fmt.Sprintf(errInvalidResourceName, r.ResourceName)
glog.Infof("Bad registration request from device plugin: %v", errorString)
klog.Infof("Bad registration request from device plugin: %s", errorString)
return &pluginapi.Empty{}, fmt.Errorf(errorString)
}
@ -323,8 +379,8 @@ func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest
func (m *ManagerImpl) Stop() error {
m.mutex.Lock()
defer m.mutex.Unlock()
for _, e := range m.endpoints {
e.stop()
for _, eI := range m.endpoints {
eI.e.stop()
}
if m.server == nil {
@ -336,66 +392,42 @@ func (m *ManagerImpl) Stop() error {
return nil
}
func (m *ManagerImpl) registerEndpoint(resourceName string, options *pluginapi.DevicePluginOptions, e endpoint) {
m.mutex.Lock()
defer m.mutex.Unlock()
m.endpoints[resourceName] = endpointInfo{e: e, opts: options}
klog.V(2).Infof("Registered endpoint %v", e)
}
func (m *ManagerImpl) runEndpoint(resourceName string, e endpoint) {
e.run()
e.stop()
m.mutex.Lock()
defer m.mutex.Unlock()
if old, ok := m.endpoints[resourceName]; ok && old.e == e {
m.markResourceUnhealthy(resourceName)
}
klog.V(2).Infof("Endpoint (%s, %v) became unhealthy", resourceName, e)
}
func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
existingDevs := make(map[string]pluginapi.Device)
m.mutex.Lock()
old, ok := m.endpoints[r.ResourceName]
if ok && old != nil {
// Pass devices of previous endpoint into re-registered one,
// to avoid potential orphaned devices upon re-registration
devices := make(map[string]pluginapi.Device)
for _, device := range old.getDevices() {
device.Health = pluginapi.Unhealthy
devices[device.ID] = device
}
existingDevs = devices
}
m.mutex.Unlock()
socketPath := filepath.Join(m.socketdir, r.Endpoint)
e, err := newEndpointImpl(socketPath, r.ResourceName, existingDevs, m.callback)
new, err := newEndpointImpl(filepath.Join(m.socketdir, r.Endpoint), r.ResourceName, m.callback)
if err != nil {
glog.Errorf("Failed to dial device plugin with request %v: %v", r, err)
klog.Errorf("Failed to dial device plugin with request %v: %v", r, err)
return
}
m.mutex.Lock()
if r.Options != nil {
m.pluginOpts[r.ResourceName] = r.Options
}
// Check for potential re-registration during the initialization of new endpoint,
// and skip updating if re-registration happens.
// TODO: simplify the part once we have a better way to handle registered devices
ext := m.endpoints[r.ResourceName]
if ext != old {
glog.Warningf("Some other endpoint %v is added while endpoint %v is initialized", ext, e)
m.mutex.Unlock()
e.stop()
return
}
// Associates the newly created endpoint with the corresponding resource name.
// Stops existing endpoint if there is any.
m.endpoints[r.ResourceName] = e
glog.V(2).Infof("Registered endpoint %v", e)
m.mutex.Unlock()
if old != nil {
old.stop()
}
m.registerEndpoint(r.ResourceName, r.Options, new)
go func() {
e.run()
e.stop()
m.mutex.Lock()
if old, ok := m.endpoints[r.ResourceName]; ok && old == e {
m.markResourceUnhealthy(r.ResourceName)
}
glog.V(2).Infof("Unregistered endpoint %v", e)
m.mutex.Unlock()
m.runEndpoint(r.ResourceName, new)
}()
}
func (m *ManagerImpl) markResourceUnhealthy(resourceName string) {
glog.V(2).Infof("Mark all resources Unhealthy for resource %s", resourceName)
klog.V(2).Infof("Mark all resources Unhealthy for resource %s", resourceName)
healthyDevices := sets.NewString()
if _, ok := m.healthyDevices[resourceName]; ok {
healthyDevices = m.healthyDevices[resourceName]
@ -426,13 +458,13 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
deletedResources := sets.NewString()
m.mutex.Lock()
for resourceName, devices := range m.healthyDevices {
e, ok := m.endpoints[resourceName]
if (ok && e.stopGracePeriodExpired()) || !ok {
eI, ok := m.endpoints[resourceName]
if (ok && eI.e.stopGracePeriodExpired()) || !ok {
// The resources contained in endpoints and (un)healthyDevices
// should always be consistent. Otherwise, we run with the risk
// of failing to garbage collect non-existing resources or devices.
if !ok {
glog.Errorf("unexpected: healthyDevices and endpoints are out of sync")
klog.Errorf("unexpected: healthyDevices and endpoints are out of sync")
}
delete(m.endpoints, resourceName)
delete(m.healthyDevices, resourceName)
@ -444,10 +476,10 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
}
}
for resourceName, devices := range m.unhealthyDevices {
e, ok := m.endpoints[resourceName]
if (ok && e.stopGracePeriodExpired()) || !ok {
eI, ok := m.endpoints[resourceName]
if (ok && eI.e.stopGracePeriodExpired()) || !ok {
if !ok {
glog.Errorf("unexpected: unhealthyDevices and endpoints are out of sync")
klog.Errorf("unexpected: unhealthyDevices and endpoints are out of sync")
}
delete(m.endpoints, resourceName)
delete(m.unhealthyDevices, resourceName)
@ -493,7 +525,7 @@ func (m *ManagerImpl) readCheckpoint() error {
err := m.checkpointManager.GetCheckpoint(kubeletDeviceManagerCheckpoint, cp)
if err != nil {
if err == errors.ErrCheckpointNotFound {
glog.Warningf("Failed to retrieve checkpoint for %q: %v", kubeletDeviceManagerCheckpoint, err)
klog.Warningf("Failed to retrieve checkpoint for %q: %v", kubeletDeviceManagerCheckpoint, err)
return nil
}
return err
@ -508,7 +540,7 @@ func (m *ManagerImpl) readCheckpoint() error {
// will stay zero till the corresponding device plugin re-registers.
m.healthyDevices[resource] = sets.NewString()
m.unhealthyDevices[resource] = sets.NewString()
m.endpoints[resource] = newStoppedEndpointImpl(resource, make(map[string]pluginapi.Device))
m.endpoints[resource] = endpointInfo{e: newStoppedEndpointImpl(resource), opts: nil}
}
return nil
}
@ -530,7 +562,7 @@ func (m *ManagerImpl) updateAllocatedDevices(activePods []*v1.Pod) {
if len(podsToBeRemoved) <= 0 {
return
}
glog.V(3).Infof("pods to be removed: %v", podsToBeRemoved.List())
klog.V(3).Infof("pods to be removed: %v", podsToBeRemoved.List())
m.podDevices.delete(podsToBeRemoved.List())
// Regenerated allocatedDevices after we update pod allocation information.
m.allocatedDevices = m.podDevices.devices()
@ -546,22 +578,22 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
// This can happen if a container restarts for example.
devices := m.podDevices.containerDevices(podUID, contName, resource)
if devices != nil {
glog.V(3).Infof("Found pre-allocated devices for resource %s container %q in Pod %q: %v", resource, contName, podUID, devices.List())
klog.V(3).Infof("Found pre-allocated devices for resource %s container %q in Pod %q: %v", resource, contName, podUID, devices.List())
needed = needed - devices.Len()
// A pod's resource is not expected to change once admitted by the API server,
// so just fail loudly here. We can revisit this part if this no longer holds.
if needed != 0 {
return nil, fmt.Errorf("pod %v container %v changed request for resource %v from %v to %v", podUID, contName, resource, devices.Len(), required)
return nil, fmt.Errorf("pod %q container %q changed request for resource %q from %d to %d", podUID, contName, resource, devices.Len(), required)
}
}
if needed == 0 {
// No change, no work.
return nil, nil
}
glog.V(3).Infof("Needs to allocate %v %v for pod %q container %q", needed, resource, podUID, contName)
klog.V(3).Infof("Needs to allocate %d %q for pod %q container %q", needed, resource, podUID, contName)
// Needs to allocate additional devices.
if _, ok := m.healthyDevices[resource]; !ok {
return nil, fmt.Errorf("can't allocate unregistered device %v", resource)
return nil, fmt.Errorf("can't allocate unregistered device %s", resource)
}
devices = sets.NewString()
// Allocates from reusableDevices list first.
@ -609,7 +641,7 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
for k, v := range container.Resources.Limits {
resource := string(k)
needed := int(v.Value())
glog.V(3).Infof("needs %d %s", needed, resource)
klog.V(3).Infof("needs %d %s", needed, resource)
if !m.isDevicePluginResource(resource) {
continue
}
@ -641,7 +673,7 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
// plugin Allocate grpc calls if it becomes common that a container may require
// resources from multiple device plugins.
m.mutex.Lock()
e, ok := m.endpoints[resource]
eI, ok := m.endpoints[resource]
m.mutex.Unlock()
if !ok {
m.mutex.Lock()
@ -653,8 +685,8 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
devs := allocDevices.UnsortedList()
// TODO: refactor this part of code to just append a ContainerAllocationRequest
// in a passed in AllocateRequest pointer, and issues a single Allocate call per pod.
glog.V(3).Infof("Making allocation request for devices %v for device plugin %s", devs, resource)
resp, err := e.allocate(devs)
klog.V(3).Infof("Making allocation request for devices %v for device plugin %s", devs, resource)
resp, err := eI.e.allocate(devs)
metrics.DevicePluginAllocationLatency.WithLabelValues(resource).Observe(metrics.SinceInMicroseconds(startRPCTime))
if err != nil {
// In case of allocation failure, we want to restore m.allocatedDevices
@ -665,6 +697,10 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
return err
}
if len(resp.ContainerResponses) == 0 {
return fmt.Errorf("No containers return in allocation response %v", resp)
}
// Update internal cached podDevices state.
m.mutex.Lock()
m.podDevices.insert(podUID, contName, resource, allocDevices, resp.ContainerResponses[0])
@ -700,16 +736,15 @@ func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
// with PreStartRequired option set.
func (m *ManagerImpl) callPreStartContainerIfNeeded(podUID, contName, resource string) error {
m.mutex.Lock()
opts, ok := m.pluginOpts[resource]
eI, ok := m.endpoints[resource]
if !ok {
m.mutex.Unlock()
glog.V(4).Infof("Plugin options not found in cache for resource: %s. Skip PreStartContainer", resource)
return nil
return fmt.Errorf("endpoint not found in cache for a registered resource: %s", resource)
}
if !opts.PreStartRequired {
if eI.opts == nil || !eI.opts.PreStartRequired {
m.mutex.Unlock()
glog.V(4).Infof("Plugin options indicate to skip PreStartContainer for resource, %v", resource)
klog.V(4).Infof("Plugin options indicate to skip PreStartContainer for resource: %s", resource)
return nil
}
@ -719,16 +754,10 @@ func (m *ManagerImpl) callPreStartContainerIfNeeded(podUID, contName, resource s
return fmt.Errorf("no devices found allocated in local cache for pod %s, container %s, resource %s", podUID, contName, resource)
}
e, ok := m.endpoints[resource]
if !ok {
m.mutex.Unlock()
return fmt.Errorf("endpoint not found in cache for a registered resource: %s", resource)
}
m.mutex.Unlock()
devs := devices.UnsortedList()
glog.V(4).Infof("Issuing an PreStartContainer call for container, %s, of pod %s", contName, podUID)
_, err := e.preStartContainer(devs)
klog.V(4).Infof("Issuing an PreStartContainer call for container, %s, of pod %s", contName, podUID)
_, err := eI.e.preStartContainer(devs)
if err != nil {
return fmt.Errorf("device plugin PreStartContainer rpc failed with err: %v", err)
}
@ -774,3 +803,10 @@ func (m *ManagerImpl) isDevicePluginResource(resource string) bool {
}
return false
}
// GetDevices returns the devices used by the specified container
func (m *ManagerImpl) GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices {
m.mutex.Lock()
defer m.mutex.Unlock()
return m.podDevices.getContainerDevices(podUID, containerName)
}

View File

@ -18,9 +18,10 @@ package devicemanager
import (
"k8s.io/api/core/v1"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/util/pluginwatcher"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@ -42,11 +43,6 @@ func (h *ManagerStub) Stop() error {
return nil
}
// Devices returns an empty map.
func (h *ManagerStub) Devices() map[string][]pluginapi.Device {
return make(map[string][]pluginapi.Device)
}
// Allocate simply returns nil.
func (h *ManagerStub) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
return nil
@ -61,3 +57,13 @@ func (h *ManagerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Co
func (h *ManagerStub) GetCapacity() (v1.ResourceList, v1.ResourceList, []string) {
return nil, nil, []string{}
}
// GetWatcherHandler returns plugin watcher interface
func (h *ManagerStub) GetWatcherHandler() pluginwatcher.PluginHandler {
return nil
}
// GetDevices returns nil
func (h *ManagerStub) GetDevices(_, _ string) []*podresourcesapi.ContainerDevices {
return nil
}

View File

@ -20,8 +20,8 @@ import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"reflect"
"sync/atomic"
"testing"
"time"
@ -33,8 +33,10 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
watcherapi "k8s.io/kubernetes/pkg/kubelet/apis/pluginregistration/v1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/util/pluginwatcher"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@ -65,31 +67,18 @@ func TestNewManagerImplStart(t *testing.T) {
socketDir, socketName, pluginSocketName, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
m, p := setup(t, []*pluginapi.Device{}, func(n string, a, u, r []pluginapi.Device) {}, socketName, pluginSocketName)
cleanup(t, m, p)
m, _, p := setup(t, []*pluginapi.Device{}, func(n string, d []pluginapi.Device) {}, socketName, pluginSocketName)
cleanup(t, m, p, nil)
// Stop should tolerate being called more than once.
cleanup(t, m, p)
cleanup(t, m, p, nil)
}
func TestNewManagerImplStop(t *testing.T) {
func TestNewManagerImplStartProbeMode(t *testing.T) {
socketDir, socketName, pluginSocketName, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
m, err := newManagerImpl(socketName)
require.NoError(t, err)
// No prior Start, but that should be okay.
err = m.Stop()
require.NoError(t, err)
devs := []*pluginapi.Device{
{ID: "Dev1", Health: pluginapi.Healthy},
{ID: "Dev2", Health: pluginapi.Healthy},
}
p := NewDevicePluginStub(devs, pluginSocketName)
// Same here.
err = p.Stop()
require.NoError(t, err)
m, _, p, w := setupInProbeMode(t, []*pluginapi.Device{}, func(n string, d []pluginapi.Device) {}, socketName, pluginSocketName)
cleanup(t, m, p, w)
}
// Tests that the device plugin manager correctly handles registration and re-registration by
@ -107,91 +96,184 @@ func TestDevicePluginReRegistration(t *testing.T) {
{ID: "Dev3", Health: pluginapi.Healthy},
}
for _, preStartContainerFlag := range []bool{false, true} {
expCallbackCount := int32(0)
callbackCount := int32(0)
callbackChan := make(chan int32)
callback := func(n string, a, u, r []pluginapi.Device) {
callbackCount++
if callbackCount > atomic.LoadInt32(&expCallbackCount) {
t.FailNow()
}
callbackChan <- callbackCount
}
m, p1 := setup(t, devs, callback, socketName, pluginSocketName)
atomic.StoreInt32(&expCallbackCount, 1)
p1.Register(socketName, testResourceName, preStartContainerFlag)
// Wait for the first callback to be issued.
m, ch, p1 := setup(t, devs, nil, socketName, pluginSocketName)
p1.Register(socketName, testResourceName, "")
select {
case <-callbackChan:
break
case <-time.After(time.Second):
t.FailNow()
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("timeout while waiting for manager update")
}
devices := m.Devices()
require.Equal(t, 2, len(devices[testResourceName]), "Devices are not updated.")
capacity, allocatable, _ := m.GetCapacity()
resourceCapacity, _ := capacity[v1.ResourceName(testResourceName)]
resourceAllocatable, _ := allocatable[v1.ResourceName(testResourceName)]
require.Equal(t, resourceCapacity.Value(), resourceAllocatable.Value(), "capacity should equal to allocatable")
require.Equal(t, int64(2), resourceAllocatable.Value(), "Devices are not updated.")
p2 := NewDevicePluginStub(devs, pluginSocketName+".new")
p2 := NewDevicePluginStub(devs, pluginSocketName+".new", testResourceName, preStartContainerFlag)
err = p2.Start()
require.NoError(t, err)
atomic.StoreInt32(&expCallbackCount, 2)
p2.Register(socketName, testResourceName, preStartContainerFlag)
// Wait for the second callback to be issued.
select {
case <-callbackChan:
break
case <-time.After(time.Second):
t.FailNow()
}
p2.Register(socketName, testResourceName, "")
devices2 := m.Devices()
require.Equal(t, 2, len(devices2[testResourceName]), "Devices shouldn't change.")
select {
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("timeout while waiting for manager update")
}
capacity, allocatable, _ = m.GetCapacity()
resourceCapacity, _ = capacity[v1.ResourceName(testResourceName)]
resourceAllocatable, _ = allocatable[v1.ResourceName(testResourceName)]
require.Equal(t, resourceCapacity.Value(), resourceAllocatable.Value(), "capacity should equal to allocatable")
require.Equal(t, int64(2), resourceAllocatable.Value(), "Devices shouldn't change.")
// Test the scenario that a plugin re-registers with different devices.
p3 := NewDevicePluginStub(devsForRegistration, pluginSocketName+".third")
p3 := NewDevicePluginStub(devsForRegistration, pluginSocketName+".third", testResourceName, preStartContainerFlag)
err = p3.Start()
require.NoError(t, err)
atomic.StoreInt32(&expCallbackCount, 3)
p3.Register(socketName, testResourceName, preStartContainerFlag)
// Wait for the second callback to be issued.
p3.Register(socketName, testResourceName, "")
select {
case <-callbackChan:
break
case <-time.After(time.Second):
t.FailNow()
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("timeout while waiting for manager update")
}
devices3 := m.Devices()
require.Equal(t, 1, len(devices3[testResourceName]), "Devices of plugin previously registered should be removed.")
capacity, allocatable, _ = m.GetCapacity()
resourceCapacity, _ = capacity[v1.ResourceName(testResourceName)]
resourceAllocatable, _ = allocatable[v1.ResourceName(testResourceName)]
require.Equal(t, resourceCapacity.Value(), resourceAllocatable.Value(), "capacity should equal to allocatable")
require.Equal(t, int64(1), resourceAllocatable.Value(), "Devices of plugin previously registered should be removed.")
p2.Stop()
p3.Stop()
cleanup(t, m, p1)
close(callbackChan)
cleanup(t, m, p1, nil)
}
}
func setup(t *testing.T, devs []*pluginapi.Device, callback monitorCallback, socketName string, pluginSocketName string) (Manager, *Stub) {
// Tests that the device plugin manager correctly handles registration and re-registration by
// making sure that after registration, devices are correctly updated and if a re-registration
// happens, we will NOT delete devices; and no orphaned devices left.
// While testing above scenario, plugin discovery and registration will be done using
// Kubelet probe based mechanism
func TestDevicePluginReRegistrationProbeMode(t *testing.T) {
socketDir, socketName, pluginSocketName, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
devs := []*pluginapi.Device{
{ID: "Dev1", Health: pluginapi.Healthy},
{ID: "Dev2", Health: pluginapi.Healthy},
}
devsForRegistration := []*pluginapi.Device{
{ID: "Dev3", Health: pluginapi.Healthy},
}
m, ch, p1, w := setupInProbeMode(t, devs, nil, socketName, pluginSocketName)
// Wait for the first callback to be issued.
select {
case <-ch:
case <-time.After(5 * time.Second):
t.FailNow()
}
capacity, allocatable, _ := m.GetCapacity()
resourceCapacity, _ := capacity[v1.ResourceName(testResourceName)]
resourceAllocatable, _ := allocatable[v1.ResourceName(testResourceName)]
require.Equal(t, resourceCapacity.Value(), resourceAllocatable.Value(), "capacity should equal to allocatable")
require.Equal(t, int64(2), resourceAllocatable.Value(), "Devices are not updated.")
p2 := NewDevicePluginStub(devs, pluginSocketName+".new", testResourceName, false)
err = p2.Start()
require.NoError(t, err)
// Wait for the second callback to be issued.
select {
case <-ch:
case <-time.After(5 * time.Second):
t.FailNow()
}
capacity, allocatable, _ = m.GetCapacity()
resourceCapacity, _ = capacity[v1.ResourceName(testResourceName)]
resourceAllocatable, _ = allocatable[v1.ResourceName(testResourceName)]
require.Equal(t, resourceCapacity.Value(), resourceAllocatable.Value(), "capacity should equal to allocatable")
require.Equal(t, int64(2), resourceAllocatable.Value(), "Devices are not updated.")
// Test the scenario that a plugin re-registers with different devices.
p3 := NewDevicePluginStub(devsForRegistration, pluginSocketName+".third", testResourceName, false)
err = p3.Start()
require.NoError(t, err)
// Wait for the third callback to be issued.
select {
case <-ch:
case <-time.After(5 * time.Second):
t.FailNow()
}
capacity, allocatable, _ = m.GetCapacity()
resourceCapacity, _ = capacity[v1.ResourceName(testResourceName)]
resourceAllocatable, _ = allocatable[v1.ResourceName(testResourceName)]
require.Equal(t, resourceCapacity.Value(), resourceAllocatable.Value(), "capacity should equal to allocatable")
require.Equal(t, int64(1), resourceAllocatable.Value(), "Devices of previous registered should be removed")
p2.Stop()
p3.Stop()
cleanup(t, m, p1, w)
}
func setupDeviceManager(t *testing.T, devs []*pluginapi.Device, callback monitorCallback, socketName string) (Manager, <-chan interface{}) {
m, err := newManagerImpl(socketName)
require.NoError(t, err)
updateChan := make(chan interface{})
m.callback = callback
if callback != nil {
m.callback = callback
}
originalCallback := m.callback
m.callback = func(resourceName string, devices []pluginapi.Device) {
originalCallback(resourceName, devices)
updateChan <- new(interface{})
}
activePods := func() []*v1.Pod {
return []*v1.Pod{}
}
err = m.Start(activePods, &sourcesReadyStub{})
require.NoError(t, err)
p := NewDevicePluginStub(devs, pluginSocketName)
err = p.Start()
require.NoError(t, err)
return m, p
return m, updateChan
}
func cleanup(t *testing.T, m Manager, p *Stub) {
func setupDevicePlugin(t *testing.T, devs []*pluginapi.Device, pluginSocketName string) *Stub {
p := NewDevicePluginStub(devs, pluginSocketName, testResourceName, false)
err := p.Start()
require.NoError(t, err)
return p
}
func setupPluginWatcher(pluginSocketName string, m Manager) *pluginwatcher.Watcher {
w := pluginwatcher.NewWatcher(filepath.Dir(pluginSocketName), "" /* deprecatedSockDir */)
w.AddHandler(watcherapi.DevicePlugin, m.GetWatcherHandler())
w.Start()
return w
}
func setup(t *testing.T, devs []*pluginapi.Device, callback monitorCallback, socketName string, pluginSocketName string) (Manager, <-chan interface{}, *Stub) {
m, updateChan := setupDeviceManager(t, devs, callback, socketName)
p := setupDevicePlugin(t, devs, pluginSocketName)
return m, updateChan, p
}
func setupInProbeMode(t *testing.T, devs []*pluginapi.Device, callback monitorCallback, socketName string, pluginSocketName string) (Manager, <-chan interface{}, *Stub, *pluginwatcher.Watcher) {
m, updateChan := setupDeviceManager(t, devs, callback, socketName)
w := setupPluginWatcher(pluginSocketName, m)
p := setupDevicePlugin(t, devs, pluginSocketName)
return m, updateChan, p, w
}
func cleanup(t *testing.T, m Manager, p *Stub, w *pluginwatcher.Watcher) {
p.Stop()
m.Stop()
if w != nil {
require.NoError(t, w.Stop())
}
}
func TestUpdateCapacityAllocatable(t *testing.T) {
@ -213,9 +295,9 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
// Adds three devices for resource1, two healthy and one unhealthy.
// Expects capacity for resource1 to be 2.
resourceName1 := "domain1.com/resource1"
e1 := &endpointImpl{devices: make(map[string]pluginapi.Device)}
testManager.endpoints[resourceName1] = e1
callback(resourceName1, devs, []pluginapi.Device{}, []pluginapi.Device{})
e1 := &endpointImpl{}
testManager.endpoints[resourceName1] = endpointInfo{e: e1, opts: nil}
callback(resourceName1, devs)
capacity, allocatable, removedResources := testManager.GetCapacity()
resource1Capacity, ok := capacity[v1.ResourceName(resourceName1)]
as.True(ok)
@ -226,7 +308,8 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
as.Equal(0, len(removedResources))
// Deletes an unhealthy device should NOT change allocatable but change capacity.
callback(resourceName1, []pluginapi.Device{}, []pluginapi.Device{}, []pluginapi.Device{devs[2]})
devs1 := devs[:len(devs)-1]
callback(resourceName1, devs1)
capacity, allocatable, removedResources = testManager.GetCapacity()
resource1Capacity, ok = capacity[v1.ResourceName(resourceName1)]
as.True(ok)
@ -237,34 +320,34 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
as.Equal(0, len(removedResources))
// Updates a healthy device to unhealthy should reduce allocatable by 1.
dev2 := devs[1]
dev2.Health = pluginapi.Unhealthy
callback(resourceName1, []pluginapi.Device{}, []pluginapi.Device{dev2}, []pluginapi.Device{})
devs[1].Health = pluginapi.Unhealthy
callback(resourceName1, devs)
capacity, allocatable, removedResources = testManager.GetCapacity()
resource1Capacity, ok = capacity[v1.ResourceName(resourceName1)]
as.True(ok)
resource1Allocatable, ok = allocatable[v1.ResourceName(resourceName1)]
as.True(ok)
as.Equal(int64(2), resource1Capacity.Value())
as.Equal(int64(3), resource1Capacity.Value())
as.Equal(int64(1), resource1Allocatable.Value())
as.Equal(0, len(removedResources))
// Deletes a healthy device should reduce capacity and allocatable by 1.
callback(resourceName1, []pluginapi.Device{}, []pluginapi.Device{}, []pluginapi.Device{devs[0]})
devs2 := devs[1:]
callback(resourceName1, devs2)
capacity, allocatable, removedResources = testManager.GetCapacity()
resource1Capacity, ok = capacity[v1.ResourceName(resourceName1)]
as.True(ok)
resource1Allocatable, ok = allocatable[v1.ResourceName(resourceName1)]
as.True(ok)
as.Equal(int64(0), resource1Allocatable.Value())
as.Equal(int64(1), resource1Capacity.Value())
as.Equal(int64(2), resource1Capacity.Value())
as.Equal(0, len(removedResources))
// Tests adding another resource.
resourceName2 := "resource2"
e2 := &endpointImpl{devices: make(map[string]pluginapi.Device)}
testManager.endpoints[resourceName2] = e2
callback(resourceName2, devs, []pluginapi.Device{}, []pluginapi.Device{})
e2 := &endpointImpl{}
testManager.endpoints[resourceName2] = endpointInfo{e: e2, opts: nil}
callback(resourceName2, devs)
capacity, allocatable, removedResources = testManager.GetCapacity()
as.Equal(2, len(capacity))
resource2Capacity, ok := capacity[v1.ResourceName(resourceName2)]
@ -272,7 +355,7 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
resource2Allocatable, ok := allocatable[v1.ResourceName(resourceName2)]
as.True(ok)
as.Equal(int64(3), resource2Capacity.Value())
as.Equal(int64(2), resource2Allocatable.Value())
as.Equal(int64(1), resource2Allocatable.Value())
as.Equal(0, len(removedResources))
// Expires resourceName1 endpoint. Verifies testManager.GetCapacity() reports that resourceName1
@ -374,7 +457,7 @@ func TestCheckpoint(t *testing.T) {
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
as.Nil(err)
testManager := &ManagerImpl{
endpoints: make(map[string]endpoint),
endpoints: make(map[string]endpointInfo),
healthyDevices: make(map[string]sets.String),
unhealthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
@ -460,11 +543,7 @@ type MockEndpoint struct {
func (m *MockEndpoint) stop() {}
func (m *MockEndpoint) run() {}
func (m *MockEndpoint) getDevices() []pluginapi.Device {
return []pluginapi.Device{}
}
func (m *MockEndpoint) callback(resourceName string, added, updated, deleted []pluginapi.Device) {}
func (m *MockEndpoint) callback(resourceName string, devices []pluginapi.Device) {}
func (m *MockEndpoint) preStartContainer(devs []string) (*pluginapi.PreStartContainerResponse, error) {
m.initChan <- devs
@ -499,8 +578,8 @@ func makePod(limits v1.ResourceList) *v1.Pod {
}
}
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource, opts map[string]*pluginapi.DevicePluginOptions) (*ManagerImpl, error) {
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource) (*ManagerImpl, error) {
monitorCallback := func(resourceName string, devices []pluginapi.Device) {}
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
if err != nil {
return nil, err
@ -511,41 +590,45 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
healthyDevices: make(map[string]sets.String),
unhealthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
endpoints: make(map[string]endpoint),
pluginOpts: opts,
endpoints: make(map[string]endpointInfo),
podDevices: make(podDevices),
activePods: activePods,
sourcesReady: &sourcesReadyStub{},
checkpointManager: ckm,
}
for _, res := range testRes {
testManager.healthyDevices[res.resourceName] = sets.NewString()
for _, dev := range res.devs {
testManager.healthyDevices[res.resourceName].Insert(dev)
}
if res.resourceName == "domain1.com/resource1" {
testManager.endpoints[res.resourceName] = &MockEndpoint{
allocateFunc: allocateStubFunc(),
testManager.endpoints[res.resourceName] = endpointInfo{
e: &MockEndpoint{allocateFunc: allocateStubFunc()},
opts: nil,
}
}
if res.resourceName == "domain2.com/resource2" {
testManager.endpoints[res.resourceName] = &MockEndpoint{
allocateFunc: func(devs []string) (*pluginapi.AllocateResponse, error) {
resp := new(pluginapi.ContainerAllocateResponse)
resp.Envs = make(map[string]string)
for _, dev := range devs {
switch dev {
case "dev3":
resp.Envs["key2"] = "val2"
testManager.endpoints[res.resourceName] = endpointInfo{
e: &MockEndpoint{
allocateFunc: func(devs []string) (*pluginapi.AllocateResponse, error) {
resp := new(pluginapi.ContainerAllocateResponse)
resp.Envs = make(map[string]string)
for _, dev := range devs {
switch dev {
case "dev3":
resp.Envs["key2"] = "val2"
case "dev4":
resp.Envs["key2"] = "val3"
case "dev4":
resp.Envs["key2"] = "val3"
}
}
}
resps := new(pluginapi.AllocateResponse)
resps.ContainerResponses = append(resps.ContainerResponses, resp)
return resps, nil
resps := new(pluginapi.AllocateResponse)
resps.ContainerResponses = append(resps.ContainerResponses, resp)
return resps, nil
},
},
opts: nil,
}
}
}
@ -591,8 +674,7 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
as.Nil(err)
defer os.RemoveAll(tmpDir)
nodeInfo := getTestNodeInfo(v1.ResourceList{})
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources)
as.Nil(err)
testPods := []*v1.Pod{
@ -619,7 +701,7 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
expectedContainerOptsLen: []int{3, 2, 2},
expectedAllocatedResName1: 2,
expectedAllocatedResName2: 1,
expErr: nil,
expErr: nil,
},
{
description: "Requesting to create a pod without enough resources should fail",
@ -627,7 +709,7 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
expectedContainerOptsLen: nil,
expectedAllocatedResName1: 2,
expectedAllocatedResName2: 1,
expErr: fmt.Errorf("requested number of devices unavailable for domain1.com/resource1. Requested: 1, Available: 0"),
expErr: fmt.Errorf("requested number of devices unavailable for domain1.com/resource1. Requested: 1, Available: 0"),
},
{
description: "Successful allocation of all available Res1 resources and Res2 resources",
@ -635,7 +717,7 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
expectedContainerOptsLen: []int{0, 0, 1},
expectedAllocatedResName1: 2,
expectedAllocatedResName2: 2,
expErr: nil,
expErr: nil,
},
}
activePods := []*v1.Pod{}
@ -687,8 +769,8 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "checkpoint")
as.Nil(err)
defer os.RemoveAll(tmpDir)
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources)
as.Nil(err)
podWithPluginResourcesInInitContainers := &v1.Pod{
@ -766,7 +848,7 @@ func TestSanitizeNodeAllocatable(t *testing.T) {
devID2 := "dev2"
as := assert.New(t)
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
monitorCallback := func(resourceName string, devices []pluginapi.Device) {}
tmpDir, err := ioutil.TempDir("", "checkpoint")
as.Nil(err)
@ -822,18 +904,18 @@ func TestDevicePreStartContainer(t *testing.T) {
as.Nil(err)
defer os.RemoveAll(tmpDir)
nodeInfo := getTestNodeInfo(v1.ResourceList{})
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
pluginOpts[res1.resourceName] = &pluginapi.DevicePluginOptions{PreStartRequired: true}
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1}, pluginOpts)
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1})
as.Nil(err)
ch := make(chan []string, 1)
testManager.endpoints[res1.resourceName] = &MockEndpoint{
initChan: ch,
allocateFunc: allocateStubFunc(),
testManager.endpoints[res1.resourceName] = endpointInfo{
e: &MockEndpoint{
initChan: ch,
allocateFunc: allocateStubFunc(),
},
opts: &pluginapi.DevicePluginOptions{PreStartRequired: true},
}
pod := makePod(v1.ResourceList{
v1.ResourceName(res1.resourceName): res1.resourceQuantity})
activePods := []*v1.Pod{}

View File

@ -17,10 +17,11 @@ limitations under the License.
package devicemanager
import (
"github.com/golang/glog"
"k8s.io/klog"
"k8s.io/apimachinery/pkg/util/sets"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
@ -135,13 +136,13 @@ func (pdev podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
for resource, devices := range resources {
devIds := devices.deviceIds.UnsortedList()
if devices.allocResp == nil {
glog.Errorf("Can't marshal allocResp for %v %v %v: allocation response is missing", podUID, conName, resource)
klog.Errorf("Can't marshal allocResp for %v %v %v: allocation response is missing", podUID, conName, resource)
continue
}
allocResp, err := devices.allocResp.Marshal()
if err != nil {
glog.Errorf("Can't marshal allocResp for %v %v %v: %v", podUID, conName, resource, err)
klog.Errorf("Can't marshal allocResp for %v %v %v: %v", podUID, conName, resource, err)
continue
}
data = append(data, checkpoint.PodDevicesEntry{
@ -159,7 +160,7 @@ func (pdev podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
// Populates podDevices from the passed in checkpointData.
func (pdev podDevices) fromCheckpointData(data []checkpoint.PodDevicesEntry) {
for _, entry := range data {
glog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
klog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, entry.AllocResp)
devIDs := sets.NewString()
for _, devID := range entry.DeviceIDs {
@ -168,7 +169,7 @@ func (pdev podDevices) fromCheckpointData(data []checkpoint.PodDevicesEntry) {
allocResp := &pluginapi.ContainerAllocateResponse{}
err := allocResp.Unmarshal(entry.AllocResp)
if err != nil {
glog.Errorf("Can't unmarshal allocResp for %v %v %v: %v", entry.PodUID, entry.ContainerName, entry.ResourceName, err)
klog.Errorf("Can't unmarshal allocResp for %v %v %v: %v", entry.PodUID, entry.ContainerName, entry.ResourceName, err)
continue
}
pdev.insert(entry.PodUID, entry.ContainerName, entry.ResourceName, devIDs, allocResp)
@ -203,13 +204,13 @@ func (pdev podDevices) deviceRunContainerOptions(podUID, contName string) *Devic
// Updates RunContainerOptions.Envs.
for k, v := range resp.Envs {
if e, ok := envsMap[k]; ok {
glog.V(4).Infof("Skip existing env %s %s", k, v)
klog.V(4).Infof("Skip existing env %s %s", k, v)
if e != v {
glog.Errorf("Environment variable %s has conflicting setting: %s and %s", k, e, v)
klog.Errorf("Environment variable %s has conflicting setting: %s and %s", k, e, v)
}
continue
}
glog.V(4).Infof("Add env %s %s", k, v)
klog.V(4).Infof("Add env %s %s", k, v)
envsMap[k] = v
opts.Envs = append(opts.Envs, kubecontainer.EnvVar{Name: k, Value: v})
}
@ -217,14 +218,14 @@ func (pdev podDevices) deviceRunContainerOptions(podUID, contName string) *Devic
// Updates RunContainerOptions.Devices.
for _, dev := range resp.Devices {
if d, ok := devsMap[dev.ContainerPath]; ok {
glog.V(4).Infof("Skip existing device %s %s", dev.ContainerPath, dev.HostPath)
klog.V(4).Infof("Skip existing device %s %s", dev.ContainerPath, dev.HostPath)
if d != dev.HostPath {
glog.Errorf("Container device %s has conflicting mapping host devices: %s and %s",
klog.Errorf("Container device %s has conflicting mapping host devices: %s and %s",
dev.ContainerPath, d, dev.HostPath)
}
continue
}
glog.V(4).Infof("Add device %s %s", dev.ContainerPath, dev.HostPath)
klog.V(4).Infof("Add device %s %s", dev.ContainerPath, dev.HostPath)
devsMap[dev.ContainerPath] = dev.HostPath
opts.Devices = append(opts.Devices, kubecontainer.DeviceInfo{
PathOnHost: dev.HostPath,
@ -236,14 +237,14 @@ func (pdev podDevices) deviceRunContainerOptions(podUID, contName string) *Devic
// Updates RunContainerOptions.Mounts.
for _, mount := range resp.Mounts {
if m, ok := mountsMap[mount.ContainerPath]; ok {
glog.V(4).Infof("Skip existing mount %s %s", mount.ContainerPath, mount.HostPath)
klog.V(4).Infof("Skip existing mount %s %s", mount.ContainerPath, mount.HostPath)
if m != mount.HostPath {
glog.Errorf("Container mount %s has conflicting mapping host mounts: %s and %s",
klog.Errorf("Container mount %s has conflicting mapping host mounts: %s and %s",
mount.ContainerPath, m, mount.HostPath)
}
continue
}
glog.V(4).Infof("Add mount %s %s", mount.ContainerPath, mount.HostPath)
klog.V(4).Infof("Add mount %s %s", mount.ContainerPath, mount.HostPath)
mountsMap[mount.ContainerPath] = mount.HostPath
opts.Mounts = append(opts.Mounts, kubecontainer.Mount{
Name: mount.ContainerPath,
@ -258,16 +259,34 @@ func (pdev podDevices) deviceRunContainerOptions(podUID, contName string) *Devic
// Updates for Annotations
for k, v := range resp.Annotations {
if e, ok := annotationsMap[k]; ok {
glog.V(4).Infof("Skip existing annotation %s %s", k, v)
klog.V(4).Infof("Skip existing annotation %s %s", k, v)
if e != v {
glog.Errorf("Annotation %s has conflicting setting: %s and %s", k, e, v)
klog.Errorf("Annotation %s has conflicting setting: %s and %s", k, e, v)
}
continue
}
glog.V(4).Infof("Add annotation %s %s", k, v)
klog.V(4).Infof("Add annotation %s %s", k, v)
annotationsMap[k] = v
opts.Annotations = append(opts.Annotations, kubecontainer.Annotation{Name: k, Value: v})
}
}
return opts
}
// getContainerDevices returns the devices assigned to the provided container for all ResourceNames
func (pdev podDevices) getContainerDevices(podUID, contName string) []*podresourcesapi.ContainerDevices {
if _, podExists := pdev[podUID]; !podExists {
return nil
}
if _, contExists := pdev[podUID][contName]; !contExists {
return nil
}
cDev := []*podresourcesapi.ContainerDevices{}
for resource, allocateInfo := range pdev[podUID][contName] {
cDev = append(cDev, &podresourcesapi.ContainerDevices{
ResourceName: resource,
DeviceIds: allocateInfo.deviceIds.UnsortedList(),
})
}
return cDev
}

View File

@ -20,10 +20,11 @@ import (
"time"
"k8s.io/api/core/v1"
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/config"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
watcher "k8s.io/kubernetes/pkg/kubelet/util/pluginwatcher"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@ -32,11 +33,6 @@ type Manager interface {
// Start starts device plugin registration service.
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error
// Devices is the map of devices that have registered themselves
// against the manager.
// The map key is the ResourceName of the device plugins.
Devices() map[string][]pluginapi.Device
// Allocate configures and assigns devices to pods. The pods are provided
// through the pod admission attributes in the attrs argument. From the
// requested device resources, Allocate will communicate with the owning
@ -58,6 +54,10 @@ type Manager interface {
// GetCapacity returns the amount of available device plugin resource capacity, resource allocatable
// and inactive device plugin resources previously registered on the node.
GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
GetWatcherHandler() watcher.PluginHandler
// GetDevices returns information about the devices assigned to pods and containers
GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices
}
// DeviceRunContainerOptions contains the combined container runtime settings to consume its allocated devices.
@ -79,22 +79,14 @@ const (
errFailedToDialDevicePlugin = "failed to dial device plugin:"
// errUnsupportedVersion is the error raised when the device plugin uses an API version not
// supported by the Kubelet registry
errUnsupportedVersion = "requested API version %q is not supported by kubelet. Supported versions are %q"
// errDevicePluginAlreadyExists is the error raised when a device plugin with the
// same Resource Name tries to register itself
errDevicePluginAlreadyExists = "another device plugin already registered this Resource Name"
errUnsupportedVersion = "requested API version %q is not supported by kubelet. Supported version is %q"
// errInvalidResourceName is the error raised when a device plugin is registering
// itself with an invalid ResourceName
errInvalidResourceName = "the ResourceName %q is invalid"
// errEmptyResourceName is the error raised when the resource name field is empty
errEmptyResourceName = "invalid Empty ResourceName"
// errEndpointStopped indicates that the endpoint has been stopped
errEndpointStopped = "endpoint %v has been stopped"
// errBadSocket is the error raised when the registry socket path is not absolute
errBadSocket = "bad socketPath, must be an absolute path:"
// errRemoveSocket is the error raised when the registry could not remove the existing socket
errRemoveSocket = "failed to remove socket while starting device plugin registry, with error"
// errListenSocket is the error raised when the registry could not listen on the socket
errListenSocket = "failed to listen to socket while starting device plugin registry, with error"
// errListAndWatch is the error raised when ListAndWatch ended unsuccessfully