mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 18:53:35 +00:00
vendor files
This commit is contained in:
70
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/BUILD
generated
vendored
Normal file
70
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/BUILD
generated
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
"go_test",
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"device_plugin_stub.go",
|
||||
"endpoint.go",
|
||||
"manager.go",
|
||||
"manager_stub.go",
|
||||
"pod_devices.go",
|
||||
"types.go",
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin",
|
||||
deps = [
|
||||
"//pkg/apis/core/v1/helper:go_default_library",
|
||||
"//pkg/kubelet/apis/deviceplugin/v1alpha:go_default_library",
|
||||
"//pkg/kubelet/config:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/metrics:go_default_library",
|
||||
"//plugin/pkg/scheduler/schedulercache:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/golang.org/x/net/context:go_default_library",
|
||||
"//vendor/google.golang.org/grpc:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = [
|
||||
"endpoint_test.go",
|
||||
"manager_test.go",
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin",
|
||||
library = ":go_default_library",
|
||||
deps = [
|
||||
"//pkg/kubelet/apis/deviceplugin/v1alpha:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//plugin/pkg/scheduler/schedulercache:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/require:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
||||
],
|
||||
)
|
6
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/OWNERS
generated
vendored
Normal file
6
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/OWNERS
generated
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
approvers:
|
||||
- jiayingz
|
||||
- vishh
|
||||
reviewers:
|
||||
- mindprince
|
||||
- RenaudWasTaken
|
158
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/device_plugin_stub.go
generated
vendored
Normal file
158
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/device_plugin_stub.go
generated
vendored
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
)
|
||||
|
||||
// Stub implementation for DevicePlugin.
|
||||
type Stub struct {
|
||||
devs []*pluginapi.Device
|
||||
socket string
|
||||
|
||||
stop chan interface{}
|
||||
update chan []*pluginapi.Device
|
||||
|
||||
server *grpc.Server
|
||||
}
|
||||
|
||||
// NewDevicePluginStub returns an initialized DevicePlugin Stub.
|
||||
func NewDevicePluginStub(devs []*pluginapi.Device, socket string) *Stub {
|
||||
return &Stub{
|
||||
devs: devs,
|
||||
socket: socket,
|
||||
|
||||
stop: make(chan interface{}),
|
||||
update: make(chan []*pluginapi.Device),
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the gRPC server of the device plugin
|
||||
func (m *Stub) Start() error {
|
||||
err := m.cleanup()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sock, err := net.Listen("unix", m.socket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.server = grpc.NewServer([]grpc.ServerOption{}...)
|
||||
pluginapi.RegisterDevicePluginServer(m.server, m)
|
||||
|
||||
go m.server.Serve(sock)
|
||||
// Wait till grpc server is ready.
|
||||
for i := 0; i < 10; i++ {
|
||||
services := m.server.GetServiceInfo()
|
||||
if len(services) > 1 {
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
log.Println("Starting to serve on", m.socket)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop stops the gRPC server
|
||||
func (m *Stub) Stop() error {
|
||||
m.server.Stop()
|
||||
close(m.stop)
|
||||
|
||||
return m.cleanup()
|
||||
}
|
||||
|
||||
// Register registers the device plugin for the given resourceName with Kubelet.
|
||||
func (m *Stub) Register(kubeletEndpoint, resourceName string) error {
|
||||
conn, err := grpc.Dial(kubeletEndpoint, grpc.WithInsecure(),
|
||||
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("unix", addr, timeout)
|
||||
}))
|
||||
defer conn.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
client := pluginapi.NewRegistrationClient(conn)
|
||||
reqt := &pluginapi.RegisterRequest{
|
||||
Version: pluginapi.Version,
|
||||
Endpoint: path.Base(m.socket),
|
||||
ResourceName: resourceName,
|
||||
}
|
||||
|
||||
_, err = client.Register(context.Background(), reqt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListAndWatch lists devices and update that list according to the Update call
|
||||
func (m *Stub) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
|
||||
log.Println("ListAndWatch")
|
||||
var devs []*pluginapi.Device
|
||||
|
||||
for _, d := range m.devs {
|
||||
devs = append(devs, &pluginapi.Device{
|
||||
ID: d.ID,
|
||||
Health: pluginapi.Healthy,
|
||||
})
|
||||
}
|
||||
|
||||
s.Send(&pluginapi.ListAndWatchResponse{Devices: devs})
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-m.stop:
|
||||
return nil
|
||||
case updated := <-m.update:
|
||||
s.Send(&pluginapi.ListAndWatchResponse{Devices: updated})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update allows the device plugin to send new devices through ListAndWatch
|
||||
func (m *Stub) Update(devs []*pluginapi.Device) {
|
||||
m.update <- devs
|
||||
}
|
||||
|
||||
// Allocate does a mock allocation
|
||||
func (m *Stub) Allocate(ctx context.Context, r *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
|
||||
log.Printf("Allocate, %+v", r)
|
||||
|
||||
var response pluginapi.AllocateResponse
|
||||
return &response, nil
|
||||
}
|
||||
|
||||
func (m *Stub) cleanup() error {
|
||||
if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
198
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/endpoint.go
generated
vendored
Normal file
198
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/endpoint.go
generated
vendored
Normal file
@ -0,0 +1,198 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
)
|
||||
|
||||
// endpoint maps to a single registered device plugin. It is responsible
|
||||
// for managing gRPC communications with the device plugin and caching
|
||||
// device states reported by the device plugin.
|
||||
type endpoint interface {
|
||||
run()
|
||||
stop()
|
||||
allocate(devs []string) (*pluginapi.AllocateResponse, error)
|
||||
getDevices() []pluginapi.Device
|
||||
callback(resourceName string, added, updated, deleted []pluginapi.Device)
|
||||
}
|
||||
|
||||
type endpointImpl struct {
|
||||
client pluginapi.DevicePluginClient
|
||||
clientConn *grpc.ClientConn
|
||||
|
||||
socketPath string
|
||||
resourceName string
|
||||
|
||||
devices map[string]pluginapi.Device
|
||||
mutex sync.Mutex
|
||||
|
||||
cb monitorCallback
|
||||
}
|
||||
|
||||
// newEndpoint creates a new endpoint for the given resourceName.
|
||||
func newEndpointImpl(socketPath, resourceName string, devices map[string]pluginapi.Device, callback monitorCallback) (*endpointImpl, error) {
|
||||
client, c, err := dial(socketPath)
|
||||
if err != nil {
|
||||
glog.Errorf("Can't create new endpoint with path %s err %v", socketPath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &endpointImpl{
|
||||
client: client,
|
||||
clientConn: c,
|
||||
|
||||
socketPath: socketPath,
|
||||
resourceName: resourceName,
|
||||
|
||||
devices: devices,
|
||||
cb: callback,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (e *endpointImpl) callback(resourceName string, added, updated, deleted []pluginapi.Device) {
|
||||
e.cb(resourceName, added, updated, deleted)
|
||||
}
|
||||
|
||||
func (e *endpointImpl) getDevices() []pluginapi.Device {
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
var devs []pluginapi.Device
|
||||
|
||||
for _, d := range e.devices {
|
||||
devs = append(devs, d)
|
||||
}
|
||||
|
||||
return devs
|
||||
}
|
||||
|
||||
// run initializes ListAndWatch gRPC call for the device plugin and
|
||||
// blocks on receiving ListAndWatch gRPC stream updates. Each ListAndWatch
|
||||
// stream update contains a new list of device states. listAndWatch compares the new
|
||||
// device states with its cached states to get list of new, updated, and deleted devices.
|
||||
// It then issues a callback to pass this information to the device manager which
|
||||
// will adjust the resource available information accordingly.
|
||||
func (e *endpointImpl) run() {
|
||||
stream, err := e.client.ListAndWatch(context.Background(), &pluginapi.Empty{})
|
||||
if err != nil {
|
||||
glog.Errorf(errListAndWatch, e.resourceName, err)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
devices := make(map[string]pluginapi.Device)
|
||||
|
||||
e.mutex.Lock()
|
||||
for _, d := range e.devices {
|
||||
devices[d.ID] = d
|
||||
}
|
||||
e.mutex.Unlock()
|
||||
|
||||
for {
|
||||
response, err := stream.Recv()
|
||||
if err != nil {
|
||||
glog.Errorf(errListAndWatch, e.resourceName, err)
|
||||
return
|
||||
}
|
||||
|
||||
devs := response.Devices
|
||||
glog.V(2).Infof("State pushed for device plugin %s", e.resourceName)
|
||||
|
||||
newDevs := make(map[string]*pluginapi.Device)
|
||||
var added, updated []pluginapi.Device
|
||||
|
||||
for _, d := range devs {
|
||||
dOld, ok := devices[d.ID]
|
||||
newDevs[d.ID] = d
|
||||
|
||||
if !ok {
|
||||
glog.V(2).Infof("New device for Endpoint %s: %v", e.resourceName, d)
|
||||
|
||||
devices[d.ID] = *d
|
||||
added = append(added, *d)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if d.Health == dOld.Health {
|
||||
continue
|
||||
}
|
||||
|
||||
if d.Health == pluginapi.Unhealthy {
|
||||
glog.Errorf("Device %s is now Unhealthy", d.ID)
|
||||
} else if d.Health == pluginapi.Healthy {
|
||||
glog.V(2).Infof("Device %s is now Healthy", d.ID)
|
||||
}
|
||||
|
||||
devices[d.ID] = *d
|
||||
updated = append(updated, *d)
|
||||
}
|
||||
|
||||
var deleted []pluginapi.Device
|
||||
for id, d := range devices {
|
||||
if _, ok := newDevs[id]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
glog.Errorf("Device %s was deleted", d.ID)
|
||||
|
||||
deleted = append(deleted, d)
|
||||
delete(devices, id)
|
||||
}
|
||||
|
||||
e.mutex.Lock()
|
||||
e.devices = devices
|
||||
e.mutex.Unlock()
|
||||
|
||||
e.callback(e.resourceName, added, updated, deleted)
|
||||
}
|
||||
}
|
||||
|
||||
// allocate issues Allocate gRPC call to the device plugin.
|
||||
func (e *endpointImpl) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
|
||||
return e.client.Allocate(context.Background(), &pluginapi.AllocateRequest{
|
||||
DevicesIDs: devs,
|
||||
})
|
||||
}
|
||||
|
||||
func (e *endpointImpl) stop() {
|
||||
e.clientConn.Close()
|
||||
}
|
||||
|
||||
// dial establishes the gRPC communication with the registered device plugin.
|
||||
func dial(unixSocketPath string) (pluginapi.DevicePluginClient, *grpc.ClientConn, error) {
|
||||
c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(),
|
||||
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("unix", addr, timeout)
|
||||
}),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf(errFailedToDialDevicePlugin+" %v", err)
|
||||
}
|
||||
|
||||
return pluginapi.NewDevicePluginClient(c), c, nil
|
||||
}
|
114
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/endpoint_test.go
generated
vendored
Normal file
114
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/endpoint_test.go
generated
vendored
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"path"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
)
|
||||
|
||||
var (
|
||||
esocketName = "mock.sock"
|
||||
)
|
||||
|
||||
func TestNewEndpoint(t *testing.T) {
|
||||
socket := path.Join("/tmp", esocketName)
|
||||
|
||||
devs := []*pluginapi.Device{
|
||||
{ID: "ADeviceId", Health: pluginapi.Healthy},
|
||||
}
|
||||
|
||||
p, e := esetup(t, devs, socket, "mock", func(n string, a, u, r []pluginapi.Device) {})
|
||||
defer ecleanup(t, p, e)
|
||||
}
|
||||
|
||||
func TestRun(t *testing.T) {
|
||||
socket := path.Join("/tmp", esocketName)
|
||||
|
||||
devs := []*pluginapi.Device{
|
||||
{ID: "ADeviceId", Health: pluginapi.Healthy},
|
||||
{ID: "AnotherDeviceId", Health: pluginapi.Healthy},
|
||||
}
|
||||
|
||||
updated := []*pluginapi.Device{
|
||||
{ID: "ADeviceId", Health: pluginapi.Unhealthy},
|
||||
{ID: "AThirdDeviceId", Health: pluginapi.Healthy},
|
||||
}
|
||||
|
||||
p, e := esetup(t, devs, socket, "mock", func(n string, a, u, r []pluginapi.Device) {
|
||||
require.Len(t, a, 1)
|
||||
require.Len(t, u, 1)
|
||||
require.Len(t, r, 1)
|
||||
|
||||
require.Equal(t, a[0].ID, updated[1].ID)
|
||||
|
||||
require.Equal(t, u[0].ID, updated[0].ID)
|
||||
require.Equal(t, u[0].Health, updated[0].Health)
|
||||
|
||||
require.Equal(t, r[0].ID, devs[1].ID)
|
||||
})
|
||||
defer ecleanup(t, p, e)
|
||||
|
||||
go e.run()
|
||||
p.Update(updated)
|
||||
time.Sleep(time.Second)
|
||||
|
||||
e.mutex.Lock()
|
||||
defer e.mutex.Unlock()
|
||||
|
||||
require.Len(t, e.devices, 2)
|
||||
for _, dref := range updated {
|
||||
d, ok := e.devices[dref.ID]
|
||||
|
||||
require.True(t, ok)
|
||||
require.Equal(t, d.ID, dref.ID)
|
||||
require.Equal(t, d.Health, dref.Health)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestGetDevices(t *testing.T) {
|
||||
e := endpointImpl{
|
||||
devices: map[string]pluginapi.Device{
|
||||
"ADeviceId": {ID: "ADeviceId", Health: pluginapi.Healthy},
|
||||
},
|
||||
}
|
||||
devs := e.getDevices()
|
||||
require.Len(t, devs, 1)
|
||||
}
|
||||
|
||||
func esetup(t *testing.T, devs []*pluginapi.Device, socket, resourceName string, callback monitorCallback) (*Stub, *endpointImpl) {
|
||||
p := NewDevicePluginStub(devs, socket)
|
||||
|
||||
err := p.Start()
|
||||
require.NoError(t, err)
|
||||
|
||||
e, err := newEndpointImpl(socket, "mock", make(map[string]pluginapi.Device), func(n string, a, u, r []pluginapi.Device) {})
|
||||
require.NoError(t, err)
|
||||
|
||||
return p, e
|
||||
}
|
||||
|
||||
func ecleanup(t *testing.T, p *Stub, e *endpointImpl) {
|
||||
p.Stop()
|
||||
e.stop()
|
||||
}
|
646
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/manager.go
generated
vendored
Normal file
646
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/manager.go
generated
vendored
Normal file
@ -0,0 +1,646 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
type ActivePodsFunc func() []*v1.Pod
|
||||
|
||||
// monitorCallback is the function called when a device's health state changes,
|
||||
// or new devices are reported, or old devices are deleted.
|
||||
// Updated contains the most recent state of the Device.
|
||||
type monitorCallback func(resourceName string, added, updated, deleted []pluginapi.Device)
|
||||
|
||||
// ManagerImpl is the structure in charge of managing Device Plugins.
|
||||
type ManagerImpl struct {
|
||||
socketname string
|
||||
socketdir string
|
||||
|
||||
endpoints map[string]endpoint // Key is ResourceName
|
||||
mutex sync.Mutex
|
||||
|
||||
server *grpc.Server
|
||||
|
||||
// activePods is a method for listing active pods on the node
|
||||
// so the amount of pluginResources requested by existing pods
|
||||
// could be counted when updating allocated devices
|
||||
activePods ActivePodsFunc
|
||||
|
||||
// sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness.
|
||||
// We use it to determine when we can purge inactive pods from checkpointed state.
|
||||
sourcesReady config.SourcesReady
|
||||
|
||||
// callback is used for updating devices' states in one time call.
|
||||
// e.g. a new device is advertised, two old devices are deleted and a running device fails.
|
||||
callback monitorCallback
|
||||
|
||||
// allDevices contains all of registered resourceNames and their exported device IDs.
|
||||
allDevices map[string]sets.String
|
||||
|
||||
// allocatedDevices contains allocated deviceIds, keyed by resourceName.
|
||||
allocatedDevices map[string]sets.String
|
||||
|
||||
// podDevices contains pod to allocated device mapping.
|
||||
podDevices podDevices
|
||||
}
|
||||
|
||||
type sourcesReadyStub struct{}
|
||||
|
||||
func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManagerImpl creates a new manager.
|
||||
func NewManagerImpl() (*ManagerImpl, error) {
|
||||
return newManagerImpl(pluginapi.KubeletSocket)
|
||||
}
|
||||
|
||||
func newManagerImpl(socketPath string) (*ManagerImpl, error) {
|
||||
glog.V(2).Infof("Creating Device Plugin manager at %s", socketPath)
|
||||
|
||||
if socketPath == "" || !filepath.IsAbs(socketPath) {
|
||||
return nil, fmt.Errorf(errBadSocket+" %v", socketPath)
|
||||
}
|
||||
|
||||
dir, file := filepath.Split(socketPath)
|
||||
manager := &ManagerImpl{
|
||||
endpoints: make(map[string]endpoint),
|
||||
socketname: file,
|
||||
socketdir: dir,
|
||||
allDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
}
|
||||
manager.callback = manager.genericDeviceUpdateCallback
|
||||
|
||||
// The following structs are populated with real implementations in manager.Start()
|
||||
// Before that, initializes them to perform no-op operations.
|
||||
manager.activePods = func() []*v1.Pod { return []*v1.Pod{} }
|
||||
manager.sourcesReady = &sourcesReadyStub{}
|
||||
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, added, updated, deleted []pluginapi.Device) {
|
||||
kept := append(updated, added...)
|
||||
m.mutex.Lock()
|
||||
if _, ok := m.allDevices[resourceName]; !ok {
|
||||
m.allDevices[resourceName] = sets.NewString()
|
||||
}
|
||||
// For now, Manager only keeps track of healthy devices.
|
||||
// TODO: adds support to track unhealthy devices.
|
||||
for _, dev := range kept {
|
||||
if dev.Health == pluginapi.Healthy {
|
||||
m.allDevices[resourceName].Insert(dev.ID)
|
||||
} else {
|
||||
m.allDevices[resourceName].Delete(dev.ID)
|
||||
}
|
||||
}
|
||||
for _, dev := range deleted {
|
||||
m.allDevices[resourceName].Delete(dev.ID)
|
||||
}
|
||||
m.mutex.Unlock()
|
||||
m.writeCheckpoint()
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) removeContents(dir string) error {
|
||||
d, err := os.Open(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer d.Close()
|
||||
names, err := d.Readdirnames(-1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, name := range names {
|
||||
filePath := filepath.Join(dir, name)
|
||||
if filePath == m.checkpointFile() {
|
||||
continue
|
||||
}
|
||||
stat, err := os.Stat(filePath)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to stat file %v: %v", filePath, err)
|
||||
continue
|
||||
}
|
||||
if stat.IsDir() {
|
||||
continue
|
||||
}
|
||||
err = os.RemoveAll(filePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
// kubeletDevicePluginCheckpoint is the file name of device plugin checkpoint
|
||||
kubeletDevicePluginCheckpoint = "kubelet_internal_checkpoint"
|
||||
)
|
||||
|
||||
// checkpointFile returns device plugin checkpoint file path.
|
||||
func (m *ManagerImpl) checkpointFile() string {
|
||||
return filepath.Join(m.socketdir, kubeletDevicePluginCheckpoint)
|
||||
}
|
||||
|
||||
// Start starts the Device Plugin Manager amd start initialization of
|
||||
// podDevices and allocatedDevices information from checkpoint-ed state and
|
||||
// starts device plugin registration service.
|
||||
func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error {
|
||||
glog.V(2).Infof("Starting Device Plugin manager")
|
||||
|
||||
m.activePods = activePods
|
||||
m.sourcesReady = sourcesReady
|
||||
|
||||
// Loads in allocatedDevices information from disk.
|
||||
err := m.readCheckpoint()
|
||||
if err != nil {
|
||||
glog.Warningf("Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date. Err: %v", err)
|
||||
}
|
||||
|
||||
socketPath := filepath.Join(m.socketdir, m.socketname)
|
||||
os.MkdirAll(m.socketdir, 0755)
|
||||
|
||||
// Removes all stale sockets in m.socketdir. Device plugins can monitor
|
||||
// this and use it as a signal to re-register with the new Kubelet.
|
||||
if err := m.removeContents(m.socketdir); err != nil {
|
||||
glog.Errorf("Fail to clean up stale contents under %s: %+v", m.socketdir, err)
|
||||
}
|
||||
|
||||
s, err := net.Listen("unix", socketPath)
|
||||
if err != nil {
|
||||
glog.Errorf(errListenSocket+" %+v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
m.server = grpc.NewServer([]grpc.ServerOption{}...)
|
||||
|
||||
pluginapi.RegisterRegistrationServer(m.server, m)
|
||||
go m.server.Serve(s)
|
||||
|
||||
glog.V(2).Infof("Serving device plugin registration server on %q", socketPath)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Devices is the map of devices that are known by the Device
|
||||
// Plugin manager with the kind of the devices as key
|
||||
func (m *ManagerImpl) Devices() map[string][]pluginapi.Device {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
|
||||
devs := make(map[string][]pluginapi.Device)
|
||||
for k, e := range m.endpoints {
|
||||
glog.V(3).Infof("Endpoint: %+v: %p", k, e)
|
||||
devs[k] = e.getDevices()
|
||||
}
|
||||
|
||||
return devs
|
||||
}
|
||||
|
||||
// Allocate is the call that you can use to allocate a set of devices
|
||||
// from the registered device plugins.
|
||||
func (m *ManagerImpl) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||
pod := attrs.Pod
|
||||
devicesToReuse := make(map[string]sets.String)
|
||||
// TODO: Reuse devices between init containers and regular containers.
|
||||
for _, container := range pod.Spec.InitContainers {
|
||||
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
|
||||
return err
|
||||
}
|
||||
m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
|
||||
}
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if err := m.allocateContainerResources(pod, &container, devicesToReuse); err != nil {
|
||||
return err
|
||||
}
|
||||
m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, devicesToReuse)
|
||||
}
|
||||
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
|
||||
// quick return if no pluginResources requested
|
||||
if _, podRequireDevicePluginResource := m.podDevices[string(pod.UID)]; !podRequireDevicePluginResource {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.sanitizeNodeAllocatable(node)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Register registers a device plugin.
|
||||
func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest) (*pluginapi.Empty, error) {
|
||||
glog.Infof("Got registration request from device plugin with resource name %q", r.ResourceName)
|
||||
metrics.DevicePluginRegistrationCount.WithLabelValues(r.ResourceName).Inc()
|
||||
if r.Version != pluginapi.Version {
|
||||
errorString := fmt.Sprintf(errUnsuportedVersion, r.Version, pluginapi.Version)
|
||||
glog.Infof("Bad registration request from device plugin with resource name %q: %v", r.ResourceName, errorString)
|
||||
return &pluginapi.Empty{}, fmt.Errorf(errorString)
|
||||
}
|
||||
|
||||
if !v1helper.IsExtendedResourceName(v1.ResourceName(r.ResourceName)) {
|
||||
errorString := fmt.Sprintf(errInvalidResourceName, r.ResourceName)
|
||||
glog.Infof("Bad registration request from device plugin: %v", errorString)
|
||||
return &pluginapi.Empty{}, fmt.Errorf(errorString)
|
||||
}
|
||||
|
||||
// TODO: for now, always accepts newest device plugin. Later may consider to
|
||||
// add some policies here, e.g., verify whether an old device plugin with the
|
||||
// same resource name is still alive to determine whether we want to accept
|
||||
// the new registration.
|
||||
go m.addEndpoint(r)
|
||||
|
||||
return &pluginapi.Empty{}, nil
|
||||
}
|
||||
|
||||
// Stop is the function that can stop the gRPC server.
|
||||
func (m *ManagerImpl) Stop() error {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
for _, e := range m.endpoints {
|
||||
e.stop()
|
||||
}
|
||||
|
||||
m.server.Stop()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
|
||||
existingDevs := make(map[string]pluginapi.Device)
|
||||
m.mutex.Lock()
|
||||
old, ok := m.endpoints[r.ResourceName]
|
||||
if ok && old != nil {
|
||||
// Pass devices of previous endpoint into re-registered one,
|
||||
// to avoid potential orphaned devices upon re-registration
|
||||
devices := make(map[string]pluginapi.Device)
|
||||
for _, device := range old.getDevices() {
|
||||
devices[device.ID] = device
|
||||
}
|
||||
existingDevs = devices
|
||||
}
|
||||
m.mutex.Unlock()
|
||||
|
||||
socketPath := filepath.Join(m.socketdir, r.Endpoint)
|
||||
e, err := newEndpointImpl(socketPath, r.ResourceName, existingDevs, m.callback)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to dial device plugin with request %v: %v", r, err)
|
||||
return
|
||||
}
|
||||
|
||||
m.mutex.Lock()
|
||||
// Check for potential re-registration during the initialization of new endpoint,
|
||||
// and skip updating if re-registration happens.
|
||||
// TODO: simplify the part once we have a better way to handle registered devices
|
||||
ext := m.endpoints[r.ResourceName]
|
||||
if ext != old {
|
||||
glog.Warningf("Some other endpoint %v is added while endpoint %v is initialized", ext, e)
|
||||
m.mutex.Unlock()
|
||||
e.stop()
|
||||
return
|
||||
}
|
||||
// Associates the newly created endpoint with the corresponding resource name.
|
||||
// Stops existing endpoint if there is any.
|
||||
m.endpoints[r.ResourceName] = e
|
||||
glog.V(2).Infof("Registered endpoint %v", e)
|
||||
m.mutex.Unlock()
|
||||
|
||||
if old != nil {
|
||||
old.stop()
|
||||
}
|
||||
|
||||
go func() {
|
||||
e.run()
|
||||
e.stop()
|
||||
|
||||
m.mutex.Lock()
|
||||
if old, ok := m.endpoints[r.ResourceName]; ok && old == e {
|
||||
glog.V(2).Infof("Delete resource for endpoint %v", e)
|
||||
delete(m.endpoints, r.ResourceName)
|
||||
}
|
||||
|
||||
glog.V(2).Infof("Unregistered endpoint %v", e)
|
||||
m.mutex.Unlock()
|
||||
}()
|
||||
}
|
||||
|
||||
// GetCapacity is expected to be called when Kubelet updates its node status.
|
||||
// The first returned variable contains the registered device plugin resource capacity.
|
||||
// The second returned variable contains previously registered resources that are no longer active.
|
||||
// Kubelet uses this information to update resource capacity/allocatable in its node status.
|
||||
// After the call, device plugin can remove the inactive resources from its internal list as the
|
||||
// change is already reflected in Kubelet node status.
|
||||
// Note in the special case after Kubelet restarts, device plugin resource capacities can
|
||||
// temporarily drop to zero till corresponding device plugins re-register. This is OK because
|
||||
// cm.UpdatePluginResource() run during predicate Admit guarantees we adjust nodeinfo
|
||||
// capacity for already allocated pods so that they can continue to run. However, new pods
|
||||
// requiring device plugin resources will not be scheduled till device plugin re-registers.
|
||||
func (m *ManagerImpl) GetCapacity() (v1.ResourceList, []string) {
|
||||
needsUpdateCheckpoint := false
|
||||
var capacity = v1.ResourceList{}
|
||||
var deletedResources []string
|
||||
m.mutex.Lock()
|
||||
for resourceName, devices := range m.allDevices {
|
||||
if _, ok := m.endpoints[resourceName]; !ok {
|
||||
delete(m.allDevices, resourceName)
|
||||
deletedResources = append(deletedResources, resourceName)
|
||||
needsUpdateCheckpoint = true
|
||||
} else {
|
||||
capacity[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(devices.Len()), resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
m.mutex.Unlock()
|
||||
if needsUpdateCheckpoint {
|
||||
m.writeCheckpoint()
|
||||
}
|
||||
return capacity, deletedResources
|
||||
}
|
||||
|
||||
// checkpointData struct is used to store pod to device allocation information
|
||||
// and registered device information in a checkpoint file.
|
||||
// TODO: add version control when we need to change checkpoint format.
|
||||
type checkpointData struct {
|
||||
PodDeviceEntries []podDevicesCheckpointEntry
|
||||
RegisteredDevices map[string][]string
|
||||
}
|
||||
|
||||
// Checkpoints device to container allocation information to disk.
|
||||
func (m *ManagerImpl) writeCheckpoint() error {
|
||||
m.mutex.Lock()
|
||||
data := checkpointData{
|
||||
PodDeviceEntries: m.podDevices.toCheckpointData(),
|
||||
RegisteredDevices: make(map[string][]string),
|
||||
}
|
||||
for resource, devices := range m.allDevices {
|
||||
data.RegisteredDevices[resource] = devices.UnsortedList()
|
||||
}
|
||||
m.mutex.Unlock()
|
||||
|
||||
dataJSON, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filepath := m.checkpointFile()
|
||||
return ioutil.WriteFile(filepath, dataJSON, 0644)
|
||||
}
|
||||
|
||||
// Reads device to container allocation information from disk, and populates
|
||||
// m.allocatedDevices accordingly.
|
||||
func (m *ManagerImpl) readCheckpoint() error {
|
||||
filepath := m.checkpointFile()
|
||||
content, err := ioutil.ReadFile(filepath)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to read checkpoint file %q: %v", filepath, err)
|
||||
}
|
||||
glog.V(2).Infof("Read checkpoint file %s\n", filepath)
|
||||
var data checkpointData
|
||||
if err := json.Unmarshal(content, &data); err != nil {
|
||||
return fmt.Errorf("failed to unmarshal checkpoint data: %v", err)
|
||||
}
|
||||
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
m.podDevices.fromCheckpointData(data.PodDeviceEntries)
|
||||
m.allocatedDevices = m.podDevices.devices()
|
||||
for resource, devices := range data.RegisteredDevices {
|
||||
m.allDevices[resource] = sets.NewString()
|
||||
for _, dev := range devices {
|
||||
m.allDevices[resource].Insert(dev)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateAllocatedDevices gets a list of active pods and then frees any Devices that are bound to
|
||||
// terminated pods. Returns error on failure.
|
||||
func (m *ManagerImpl) updateAllocatedDevices(activePods []*v1.Pod) {
|
||||
if !m.sourcesReady.AllReady() {
|
||||
return
|
||||
}
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
activePodUids := sets.NewString()
|
||||
for _, pod := range activePods {
|
||||
activePodUids.Insert(string(pod.UID))
|
||||
}
|
||||
allocatedPodUids := m.podDevices.pods()
|
||||
podsToBeRemoved := allocatedPodUids.Difference(activePodUids)
|
||||
if len(podsToBeRemoved) <= 0 {
|
||||
return
|
||||
}
|
||||
glog.V(3).Infof("pods to be removed: %v", podsToBeRemoved.List())
|
||||
m.podDevices.delete(podsToBeRemoved.List())
|
||||
// Regenerated allocatedDevices after we update pod allocation information.
|
||||
m.allocatedDevices = m.podDevices.devices()
|
||||
}
|
||||
|
||||
// Returns list of device Ids we need to allocate with Allocate rpc call.
|
||||
// Returns empty list in case we don't need to issue the Allocate rpc call.
|
||||
func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, required int, reusableDevices sets.String) (sets.String, error) {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
needed := required
|
||||
// Gets list of devices that have already been allocated.
|
||||
// This can happen if a container restarts for example.
|
||||
devices := m.podDevices.containerDevices(podUID, contName, resource)
|
||||
if devices != nil {
|
||||
glog.V(3).Infof("Found pre-allocated devices for resource %s container %q in Pod %q: %v", resource, contName, podUID, devices.List())
|
||||
needed = needed - devices.Len()
|
||||
// A pod's resource is not expected to change once admitted by the API server,
|
||||
// so just fail loudly here. We can revisit this part if this no longer holds.
|
||||
if needed != 0 {
|
||||
return nil, fmt.Errorf("pod %v container %v changed request for resource %v from %v to %v", podUID, contName, resource, devices.Len(), required)
|
||||
}
|
||||
}
|
||||
if needed == 0 {
|
||||
// No change, no work.
|
||||
return nil, nil
|
||||
}
|
||||
glog.V(3).Infof("Needs to allocate %v %v for pod %q container %q", needed, resource, podUID, contName)
|
||||
// Needs to allocate additional devices.
|
||||
if _, ok := m.allDevices[resource]; !ok {
|
||||
return nil, fmt.Errorf("can't allocate unregistered device %v", resource)
|
||||
}
|
||||
devices = sets.NewString()
|
||||
// Allocates from reusableDevices list first.
|
||||
for device := range reusableDevices {
|
||||
devices.Insert(device)
|
||||
needed--
|
||||
if needed == 0 {
|
||||
return devices, nil
|
||||
}
|
||||
}
|
||||
// Needs to allocate additional devices.
|
||||
if m.allocatedDevices[resource] == nil {
|
||||
m.allocatedDevices[resource] = sets.NewString()
|
||||
}
|
||||
// Gets Devices in use.
|
||||
devicesInUse := m.allocatedDevices[resource]
|
||||
// Gets a list of available devices.
|
||||
available := m.allDevices[resource].Difference(devicesInUse)
|
||||
if int(available.Len()) < needed {
|
||||
return nil, fmt.Errorf("requested number of devices unavailable for %s. Requested: %d, Available: %d", resource, needed, available.Len())
|
||||
}
|
||||
allocated := available.UnsortedList()[:needed]
|
||||
// Updates m.allocatedDevices with allocated devices to prevent them
|
||||
// from being allocated to other pods/containers, given that we are
|
||||
// not holding lock during the rpc call.
|
||||
for _, device := range allocated {
|
||||
m.allocatedDevices[resource].Insert(device)
|
||||
devices.Insert(device)
|
||||
}
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
// allocateContainerResources attempts to allocate all of required device
|
||||
// plugin resources for the input container, issues an Allocate rpc request
|
||||
// for each new device resource requirement, processes their AllocateResponses,
|
||||
// and updates the cached containerDevices on success.
|
||||
func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Container, devicesToReuse map[string]sets.String) error {
|
||||
podUID := string(pod.UID)
|
||||
contName := container.Name
|
||||
allocatedDevicesUpdated := false
|
||||
for k, v := range container.Resources.Limits {
|
||||
resource := string(k)
|
||||
needed := int(v.Value())
|
||||
glog.V(3).Infof("needs %d %s", needed, resource)
|
||||
_, registeredResource := m.allDevices[resource]
|
||||
_, allocatedResource := m.allocatedDevices[resource]
|
||||
// Continues if this is neither an active device plugin resource nor
|
||||
// a resource we have previously allocated.
|
||||
if !registeredResource && !allocatedResource {
|
||||
continue
|
||||
}
|
||||
// Updates allocatedDevices to garbage collect any stranded resources
|
||||
// before doing the device plugin allocation.
|
||||
if !allocatedDevicesUpdated {
|
||||
m.updateAllocatedDevices(m.activePods())
|
||||
allocatedDevicesUpdated = true
|
||||
}
|
||||
allocDevices, err := m.devicesToAllocate(podUID, contName, resource, needed, devicesToReuse[resource])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if allocDevices == nil || len(allocDevices) <= 0 {
|
||||
continue
|
||||
}
|
||||
startRPCTime := time.Now()
|
||||
// devicePluginManager.Allocate involves RPC calls to device plugin, which
|
||||
// could be heavy-weight. Therefore we want to perform this operation outside
|
||||
// mutex lock. Note if Allocate call fails, we may leave container resources
|
||||
// partially allocated for the failed container. We rely on updateAllocatedDevices()
|
||||
// to garbage collect these resources later. Another side effect is that if
|
||||
// we have X resource A and Y resource B in total, and two containers, container1
|
||||
// and container2 both require X resource A and Y resource B. Both allocation
|
||||
// requests may fail if we serve them in mixed order.
|
||||
// TODO: may revisit this part later if we see inefficient resource allocation
|
||||
// in real use as the result of this. Should also consider to parallize device
|
||||
// plugin Allocate grpc calls if it becomes common that a container may require
|
||||
// resources from multiple device plugins.
|
||||
m.mutex.Lock()
|
||||
e, ok := m.endpoints[resource]
|
||||
m.mutex.Unlock()
|
||||
if !ok {
|
||||
m.mutex.Lock()
|
||||
m.allocatedDevices = m.podDevices.devices()
|
||||
m.mutex.Unlock()
|
||||
return fmt.Errorf("Unknown Device Plugin %s", resource)
|
||||
}
|
||||
|
||||
devs := allocDevices.UnsortedList()
|
||||
glog.V(3).Infof("Making allocation request for devices %v for device plugin %s", devs, resource)
|
||||
resp, err := e.allocate(devs)
|
||||
metrics.DevicePluginAllocationLatency.WithLabelValues(resource).Observe(metrics.SinceInMicroseconds(startRPCTime))
|
||||
if err != nil {
|
||||
// In case of allocation failure, we want to restore m.allocatedDevices
|
||||
// to the actual allocated state from m.podDevices.
|
||||
m.mutex.Lock()
|
||||
m.allocatedDevices = m.podDevices.devices()
|
||||
m.mutex.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
// Update internal cached podDevices state.
|
||||
m.mutex.Lock()
|
||||
m.podDevices.insert(podUID, contName, resource, allocDevices, resp)
|
||||
m.mutex.Unlock()
|
||||
}
|
||||
|
||||
// Checkpoints device to container allocation information.
|
||||
return m.writeCheckpoint()
|
||||
}
|
||||
|
||||
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
|
||||
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
|
||||
// for the found one. An empty struct is returned in case no cached state is found.
|
||||
func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
return m.podDevices.deviceRunContainerOptions(string(pod.UID), container.Name)
|
||||
}
|
||||
|
||||
// sanitizeNodeAllocatable scans through allocatedDevices in the device manager
|
||||
// and if necessary, updates allocatableResource in nodeInfo to at least equal to
|
||||
// the allocated capacity. This allows pods that have already been scheduled on
|
||||
// the node to pass GeneralPredicates admission checking even upon device plugin failure.
|
||||
func (m *ManagerImpl) sanitizeNodeAllocatable(node *schedulercache.NodeInfo) {
|
||||
var newAllocatableResource *schedulercache.Resource
|
||||
allocatableResource := node.AllocatableResource()
|
||||
if allocatableResource.ScalarResources == nil {
|
||||
allocatableResource.ScalarResources = make(map[v1.ResourceName]int64)
|
||||
}
|
||||
for resource, devices := range m.allocatedDevices {
|
||||
needed := devices.Len()
|
||||
quant, ok := allocatableResource.ScalarResources[v1.ResourceName(resource)]
|
||||
if ok && int(quant) >= needed {
|
||||
continue
|
||||
}
|
||||
// Needs to update nodeInfo.AllocatableResource to make sure
|
||||
// NodeInfo.allocatableResource at least equal to the capacity already allocated.
|
||||
if newAllocatableResource == nil {
|
||||
newAllocatableResource = allocatableResource.Clone()
|
||||
}
|
||||
newAllocatableResource.ScalarResources[v1.ResourceName(resource)] = int64(needed)
|
||||
}
|
||||
if newAllocatableResource != nil {
|
||||
node.SetAllocatableResource(newAllocatableResource)
|
||||
}
|
||||
}
|
63
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/manager_stub.go
generated
vendored
Normal file
63
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/manager_stub.go
generated
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
// ManagerStub provides a simple stub implementation for the Device Manager.
|
||||
type ManagerStub struct{}
|
||||
|
||||
// NewManagerStub creates a ManagerStub.
|
||||
func NewManagerStub() (*ManagerStub, error) {
|
||||
return &ManagerStub{}, nil
|
||||
}
|
||||
|
||||
// Start simply returns nil.
|
||||
func (h *ManagerStub) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop simply returns nil.
|
||||
func (h *ManagerStub) Stop() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Devices returns an empty map.
|
||||
func (h *ManagerStub) Devices() map[string][]pluginapi.Device {
|
||||
return make(map[string][]pluginapi.Device)
|
||||
}
|
||||
|
||||
// Allocate simply returns nil.
|
||||
func (h *ManagerStub) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetDeviceRunContainerOptions simply returns nil.
|
||||
func (h *ManagerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetCapacity simply returns nil capacity and empty removed resource list.
|
||||
func (h *ManagerStub) GetCapacity() (v1.ResourceList, []string) {
|
||||
return nil, []string{}
|
||||
}
|
658
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/manager_test.go
generated
vendored
Normal file
658
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/manager_test.go
generated
vendored
Normal file
@ -0,0 +1,658 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
const (
|
||||
socketName = "/tmp/device_plugin/server.sock"
|
||||
pluginSocketName = "/tmp/device_plugin/device-plugin.sock"
|
||||
testResourceName = "fake-domain/resource"
|
||||
)
|
||||
|
||||
func TestNewManagerImpl(t *testing.T) {
|
||||
_, err := newManagerImpl(socketName)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestNewManagerImplStart(t *testing.T) {
|
||||
m, p := setup(t, []*pluginapi.Device{}, func(n string, a, u, r []pluginapi.Device) {})
|
||||
cleanup(t, m, p)
|
||||
}
|
||||
|
||||
// Tests that the device plugin manager correctly handles registration and re-registration by
|
||||
// making sure that after registration, devices are correctly updated and if a re-registration
|
||||
// happens, we will NOT delete devices; and no orphaned devices left.
|
||||
func TestDevicePluginReRegistration(t *testing.T) {
|
||||
devs := []*pluginapi.Device{
|
||||
{ID: "Dev1", Health: pluginapi.Healthy},
|
||||
{ID: "Dev2", Health: pluginapi.Healthy},
|
||||
}
|
||||
devsForRegistration := []*pluginapi.Device{
|
||||
{ID: "Dev3", Health: pluginapi.Healthy},
|
||||
}
|
||||
|
||||
callbackCount := 0
|
||||
callbackChan := make(chan int)
|
||||
var stopping int32
|
||||
stopping = 0
|
||||
callback := func(n string, a, u, r []pluginapi.Device) {
|
||||
// Should be called three times, one for each plugin registration, till we are stopping.
|
||||
if callbackCount > 2 && atomic.LoadInt32(&stopping) <= 0 {
|
||||
t.FailNow()
|
||||
}
|
||||
callbackCount++
|
||||
callbackChan <- callbackCount
|
||||
}
|
||||
m, p1 := setup(t, devs, callback)
|
||||
p1.Register(socketName, testResourceName)
|
||||
// Wait for the first callback to be issued.
|
||||
|
||||
<-callbackChan
|
||||
// Wait till the endpoint is added to the manager.
|
||||
for i := 0; i < 20; i++ {
|
||||
if len(m.Devices()) > 0 {
|
||||
break
|
||||
}
|
||||
time.Sleep(1)
|
||||
}
|
||||
devices := m.Devices()
|
||||
require.Equal(t, 2, len(devices[testResourceName]), "Devices are not updated.")
|
||||
|
||||
p2 := NewDevicePluginStub(devs, pluginSocketName+".new")
|
||||
err := p2.Start()
|
||||
require.NoError(t, err)
|
||||
p2.Register(socketName, testResourceName)
|
||||
// Wait for the second callback to be issued.
|
||||
<-callbackChan
|
||||
|
||||
devices2 := m.Devices()
|
||||
require.Equal(t, 2, len(devices2[testResourceName]), "Devices shouldn't change.")
|
||||
|
||||
// Test the scenario that a plugin re-registers with different devices.
|
||||
p3 := NewDevicePluginStub(devsForRegistration, pluginSocketName+".third")
|
||||
err = p3.Start()
|
||||
require.NoError(t, err)
|
||||
p3.Register(socketName, testResourceName)
|
||||
// Wait for the second callback to be issued.
|
||||
<-callbackChan
|
||||
|
||||
devices3 := m.Devices()
|
||||
require.Equal(t, 1, len(devices3[testResourceName]), "Devices of plugin previously registered should be removed.")
|
||||
// Wait long enough to catch unexpected callbacks.
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
atomic.StoreInt32(&stopping, 1)
|
||||
p2.Stop()
|
||||
p3.Stop()
|
||||
cleanup(t, m, p1)
|
||||
|
||||
}
|
||||
|
||||
func setup(t *testing.T, devs []*pluginapi.Device, callback monitorCallback) (Manager, *Stub) {
|
||||
m, err := newManagerImpl(socketName)
|
||||
require.NoError(t, err)
|
||||
|
||||
m.callback = callback
|
||||
|
||||
activePods := func() []*v1.Pod {
|
||||
return []*v1.Pod{}
|
||||
}
|
||||
err = m.Start(activePods, &sourcesReadyStub{})
|
||||
require.NoError(t, err)
|
||||
|
||||
p := NewDevicePluginStub(devs, pluginSocketName)
|
||||
err = p.Start()
|
||||
require.NoError(t, err)
|
||||
|
||||
return m, p
|
||||
}
|
||||
|
||||
func cleanup(t *testing.T, m Manager, p *Stub) {
|
||||
p.Stop()
|
||||
m.Stop()
|
||||
}
|
||||
|
||||
func TestUpdateCapacity(t *testing.T) {
|
||||
testManager, err := newManagerImpl(socketName)
|
||||
as := assert.New(t)
|
||||
as.NotNil(testManager)
|
||||
as.Nil(err)
|
||||
|
||||
devs := []pluginapi.Device{
|
||||
{ID: "Device1", Health: pluginapi.Healthy},
|
||||
{ID: "Device2", Health: pluginapi.Healthy},
|
||||
{ID: "Device3", Health: pluginapi.Unhealthy},
|
||||
}
|
||||
callback := testManager.genericDeviceUpdateCallback
|
||||
|
||||
// Adds three devices for resource1, two healthy and one unhealthy.
|
||||
// Expects capacity for resource1 to be 2.
|
||||
resourceName1 := "domain1.com/resource1"
|
||||
testManager.endpoints[resourceName1] = &endpointImpl{devices: make(map[string]pluginapi.Device)}
|
||||
callback(resourceName1, devs, []pluginapi.Device{}, []pluginapi.Device{})
|
||||
capacity, removedResources := testManager.GetCapacity()
|
||||
resource1Capacity, ok := capacity[v1.ResourceName(resourceName1)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(2), resource1Capacity.Value())
|
||||
as.Equal(0, len(removedResources))
|
||||
|
||||
// Deletes an unhealthy device should NOT change capacity.
|
||||
callback(resourceName1, []pluginapi.Device{}, []pluginapi.Device{}, []pluginapi.Device{devs[2]})
|
||||
capacity, removedResources = testManager.GetCapacity()
|
||||
resource1Capacity, ok = capacity[v1.ResourceName(resourceName1)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(2), resource1Capacity.Value())
|
||||
as.Equal(0, len(removedResources))
|
||||
|
||||
// Updates a healthy device to unhealthy should reduce capacity by 1.
|
||||
dev2 := devs[1]
|
||||
dev2.Health = pluginapi.Unhealthy
|
||||
callback(resourceName1, []pluginapi.Device{}, []pluginapi.Device{dev2}, []pluginapi.Device{})
|
||||
capacity, removedResources = testManager.GetCapacity()
|
||||
resource1Capacity, ok = capacity[v1.ResourceName(resourceName1)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(1), resource1Capacity.Value())
|
||||
as.Equal(0, len(removedResources))
|
||||
|
||||
// Deletes a healthy device should reduce capacity by 1.
|
||||
callback(resourceName1, []pluginapi.Device{}, []pluginapi.Device{}, []pluginapi.Device{devs[0]})
|
||||
capacity, removedResources = testManager.GetCapacity()
|
||||
resource1Capacity, ok = capacity[v1.ResourceName(resourceName1)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(0), resource1Capacity.Value())
|
||||
as.Equal(0, len(removedResources))
|
||||
|
||||
// Tests adding another resource.
|
||||
resourceName2 := "resource2"
|
||||
testManager.endpoints[resourceName2] = &endpointImpl{devices: make(map[string]pluginapi.Device)}
|
||||
callback(resourceName2, devs, []pluginapi.Device{}, []pluginapi.Device{})
|
||||
capacity, removedResources = testManager.GetCapacity()
|
||||
as.Equal(2, len(capacity))
|
||||
resource2Capacity, ok := capacity[v1.ResourceName(resourceName2)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(2), resource2Capacity.Value())
|
||||
as.Equal(0, len(removedResources))
|
||||
|
||||
// Removes resourceName1 endpoint. Verifies testManager.GetCapacity() reports that resourceName1
|
||||
// is removed from capacity and it no longer exists in allDevices after the call.
|
||||
delete(testManager.endpoints, resourceName1)
|
||||
capacity, removed := testManager.GetCapacity()
|
||||
as.Equal([]string{resourceName1}, removed)
|
||||
_, ok = capacity[v1.ResourceName(resourceName1)]
|
||||
as.False(ok)
|
||||
val, ok := capacity[v1.ResourceName(resourceName2)]
|
||||
as.True(ok)
|
||||
as.Equal(int64(2), val.Value())
|
||||
_, ok = testManager.allDevices[resourceName1]
|
||||
as.False(ok)
|
||||
}
|
||||
|
||||
type stringPairType struct {
|
||||
value1 string
|
||||
value2 string
|
||||
}
|
||||
|
||||
func constructDevices(devices []string) sets.String {
|
||||
ret := sets.NewString()
|
||||
for _, dev := range devices {
|
||||
ret.Insert(dev)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func constructAllocResp(devices, mounts, envs map[string]string) *pluginapi.AllocateResponse {
|
||||
resp := &pluginapi.AllocateResponse{}
|
||||
for k, v := range devices {
|
||||
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
|
||||
HostPath: k,
|
||||
ContainerPath: v,
|
||||
Permissions: "mrw",
|
||||
})
|
||||
}
|
||||
for k, v := range mounts {
|
||||
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
|
||||
ContainerPath: k,
|
||||
HostPath: v,
|
||||
ReadOnly: true,
|
||||
})
|
||||
}
|
||||
resp.Envs = make(map[string]string)
|
||||
for k, v := range envs {
|
||||
resp.Envs[k] = v
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
func TestCheckpoint(t *testing.T) {
|
||||
resourceName1 := "domain1.com/resource1"
|
||||
resourceName2 := "domain2.com/resource2"
|
||||
|
||||
as := assert.New(t)
|
||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||
as.Nil(err)
|
||||
defer os.RemoveAll(tmpDir)
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
allDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
}
|
||||
|
||||
testManager.podDevices.insert("pod1", "con1", resourceName1,
|
||||
constructDevices([]string{"dev1", "dev2"}),
|
||||
constructAllocResp(map[string]string{"/dev/r1dev1": "/dev/r1dev1", "/dev/r1dev2": "/dev/r1dev2"},
|
||||
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
|
||||
testManager.podDevices.insert("pod1", "con1", resourceName2,
|
||||
constructDevices([]string{"dev1", "dev2"}),
|
||||
constructAllocResp(map[string]string{"/dev/r2dev1": "/dev/r2dev1", "/dev/r2dev2": "/dev/r2dev2"},
|
||||
map[string]string{"/home/r2lib1": "/usr/r2lib1"},
|
||||
map[string]string{"r2devices": "dev1 dev2"}))
|
||||
testManager.podDevices.insert("pod1", "con2", resourceName1,
|
||||
constructDevices([]string{"dev3"}),
|
||||
constructAllocResp(map[string]string{"/dev/r1dev3": "/dev/r1dev3"},
|
||||
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
|
||||
testManager.podDevices.insert("pod2", "con1", resourceName1,
|
||||
constructDevices([]string{"dev4"}),
|
||||
constructAllocResp(map[string]string{"/dev/r1dev4": "/dev/r1dev4"},
|
||||
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
|
||||
|
||||
testManager.allDevices[resourceName1] = sets.NewString()
|
||||
testManager.allDevices[resourceName1].Insert("dev1")
|
||||
testManager.allDevices[resourceName1].Insert("dev2")
|
||||
testManager.allDevices[resourceName1].Insert("dev3")
|
||||
testManager.allDevices[resourceName1].Insert("dev4")
|
||||
testManager.allDevices[resourceName1].Insert("dev5")
|
||||
testManager.allDevices[resourceName2] = sets.NewString()
|
||||
testManager.allDevices[resourceName2].Insert("dev1")
|
||||
testManager.allDevices[resourceName2].Insert("dev2")
|
||||
|
||||
expectedPodDevices := testManager.podDevices
|
||||
expectedAllocatedDevices := testManager.podDevices.devices()
|
||||
expectedAllDevices := testManager.allDevices
|
||||
|
||||
err = testManager.writeCheckpoint()
|
||||
|
||||
as.Nil(err)
|
||||
testManager.podDevices = make(podDevices)
|
||||
err = testManager.readCheckpoint()
|
||||
as.Nil(err)
|
||||
|
||||
as.Equal(len(expectedPodDevices), len(testManager.podDevices))
|
||||
for podUID, containerDevices := range expectedPodDevices {
|
||||
for conName, resources := range containerDevices {
|
||||
for resource := range resources {
|
||||
as.True(reflect.DeepEqual(
|
||||
expectedPodDevices.containerDevices(podUID, conName, resource),
|
||||
testManager.podDevices.containerDevices(podUID, conName, resource)))
|
||||
opts1 := expectedPodDevices.deviceRunContainerOptions(podUID, conName)
|
||||
opts2 := testManager.podDevices.deviceRunContainerOptions(podUID, conName)
|
||||
as.Equal(len(opts1.Envs), len(opts2.Envs))
|
||||
as.Equal(len(opts1.Mounts), len(opts2.Mounts))
|
||||
as.Equal(len(opts1.Devices), len(opts2.Devices))
|
||||
}
|
||||
}
|
||||
}
|
||||
as.True(reflect.DeepEqual(expectedAllocatedDevices, testManager.allocatedDevices))
|
||||
as.True(reflect.DeepEqual(expectedAllDevices, testManager.allDevices))
|
||||
}
|
||||
|
||||
type activePodsStub struct {
|
||||
activePods []*v1.Pod
|
||||
}
|
||||
|
||||
func (a *activePodsStub) getActivePods() []*v1.Pod {
|
||||
return a.activePods
|
||||
}
|
||||
|
||||
func (a *activePodsStub) updateActivePods(newPods []*v1.Pod) {
|
||||
a.activePods = newPods
|
||||
}
|
||||
|
||||
type MockEndpoint struct {
|
||||
allocateFunc func(devs []string) (*pluginapi.AllocateResponse, error)
|
||||
}
|
||||
|
||||
func (m *MockEndpoint) stop() {}
|
||||
func (m *MockEndpoint) run() {}
|
||||
|
||||
func (m *MockEndpoint) getDevices() []pluginapi.Device {
|
||||
return []pluginapi.Device{}
|
||||
}
|
||||
|
||||
func (m *MockEndpoint) callback(resourceName string, added, updated, deleted []pluginapi.Device) {}
|
||||
|
||||
func (m *MockEndpoint) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
|
||||
if m.allocateFunc != nil {
|
||||
return m.allocateFunc(devs)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func TestPodContainerDeviceAllocation(t *testing.T) {
|
||||
flag.Set("alsologtostderr", fmt.Sprintf("%t", true))
|
||||
var logLevel string
|
||||
flag.StringVar(&logLevel, "logLevel", "4", "test")
|
||||
flag.Lookup("v").Value.Set(logLevel)
|
||||
|
||||
resourceName1 := "domain1.com/resource1"
|
||||
resourceQuantity1 := *resource.NewQuantity(int64(2), resource.DecimalSI)
|
||||
devID1 := "dev1"
|
||||
devID2 := "dev2"
|
||||
resourceName2 := "domain2.com/resource2"
|
||||
resourceQuantity2 := *resource.NewQuantity(int64(1), resource.DecimalSI)
|
||||
devID3 := "dev3"
|
||||
devID4 := "dev4"
|
||||
|
||||
as := require.New(t)
|
||||
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
|
||||
podsStub := activePodsStub{
|
||||
activePods: []*v1.Pod{},
|
||||
}
|
||||
cachedNode := &v1.Node{
|
||||
Status: v1.NodeStatus{
|
||||
Allocatable: v1.ResourceList{},
|
||||
},
|
||||
}
|
||||
nodeInfo := &schedulercache.NodeInfo{}
|
||||
nodeInfo.SetNode(cachedNode)
|
||||
|
||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||
as.Nil(err)
|
||||
defer os.RemoveAll(tmpDir)
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
callback: monitorCallback,
|
||||
allDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
endpoints: make(map[string]endpoint),
|
||||
podDevices: make(podDevices),
|
||||
activePods: podsStub.getActivePods,
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
}
|
||||
|
||||
testManager.allDevices[resourceName1] = sets.NewString()
|
||||
testManager.allDevices[resourceName1].Insert(devID1)
|
||||
testManager.allDevices[resourceName1].Insert(devID2)
|
||||
testManager.allDevices[resourceName2] = sets.NewString()
|
||||
testManager.allDevices[resourceName2].Insert(devID3)
|
||||
testManager.allDevices[resourceName2].Insert(devID4)
|
||||
|
||||
testManager.endpoints[resourceName1] = &MockEndpoint{
|
||||
allocateFunc: func(devs []string) (*pluginapi.AllocateResponse, error) {
|
||||
resp := new(pluginapi.AllocateResponse)
|
||||
resp.Envs = make(map[string]string)
|
||||
for _, dev := range devs {
|
||||
switch dev {
|
||||
case "dev1":
|
||||
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
|
||||
ContainerPath: "/dev/aaa",
|
||||
HostPath: "/dev/aaa",
|
||||
Permissions: "mrw",
|
||||
})
|
||||
|
||||
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
|
||||
ContainerPath: "/dev/bbb",
|
||||
HostPath: "/dev/bbb",
|
||||
Permissions: "mrw",
|
||||
})
|
||||
|
||||
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
|
||||
ContainerPath: "/container_dir1/file1",
|
||||
HostPath: "host_dir1/file1",
|
||||
ReadOnly: true,
|
||||
})
|
||||
|
||||
case "dev2":
|
||||
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
|
||||
ContainerPath: "/dev/ccc",
|
||||
HostPath: "/dev/ccc",
|
||||
Permissions: "mrw",
|
||||
})
|
||||
|
||||
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
|
||||
ContainerPath: "/container_dir1/file2",
|
||||
HostPath: "host_dir1/file2",
|
||||
ReadOnly: true,
|
||||
})
|
||||
|
||||
resp.Envs["key1"] = "val1"
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
},
|
||||
}
|
||||
|
||||
testManager.endpoints[resourceName2] = &MockEndpoint{
|
||||
allocateFunc: func(devs []string) (*pluginapi.AllocateResponse, error) {
|
||||
resp := new(pluginapi.AllocateResponse)
|
||||
resp.Envs = make(map[string]string)
|
||||
for _, dev := range devs {
|
||||
switch dev {
|
||||
case "dev3":
|
||||
resp.Envs["key2"] = "val2"
|
||||
|
||||
case "dev4":
|
||||
resp.Envs["key2"] = "val3"
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
},
|
||||
}
|
||||
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: uuid.NewUUID(),
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: string(uuid.NewUUID()),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(resourceName1): resourceQuantity1,
|
||||
v1.ResourceName("cpu"): resourceQuantity1,
|
||||
v1.ResourceName(resourceName2): resourceQuantity2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
podsStub.updateActivePods([]*v1.Pod{pod})
|
||||
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: pod})
|
||||
as.Nil(err)
|
||||
runContainerOpts := testManager.GetDeviceRunContainerOptions(pod, &pod.Spec.Containers[0])
|
||||
as.NotNil(runContainerOpts)
|
||||
as.Equal(len(runContainerOpts.Devices), 3)
|
||||
as.Equal(len(runContainerOpts.Mounts), 2)
|
||||
as.Equal(len(runContainerOpts.Envs), 2)
|
||||
|
||||
// Requesting to create a pod without enough resources should fail.
|
||||
as.Equal(2, testManager.allocatedDevices[resourceName1].Len())
|
||||
failPod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: uuid.NewUUID(),
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: string(uuid.NewUUID()),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(resourceName1): resourceQuantity2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: failPod})
|
||||
as.NotNil(err)
|
||||
runContainerOpts2 := testManager.GetDeviceRunContainerOptions(failPod, &failPod.Spec.Containers[0])
|
||||
as.Nil(runContainerOpts2)
|
||||
|
||||
// Requesting to create a new pod with a single resourceName2 should succeed.
|
||||
newPod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: uuid.NewUUID(),
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: string(uuid.NewUUID()),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(resourceName2): resourceQuantity2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: newPod})
|
||||
as.Nil(err)
|
||||
runContainerOpts3 := testManager.GetDeviceRunContainerOptions(newPod, &newPod.Spec.Containers[0])
|
||||
as.Equal(1, len(runContainerOpts3.Envs))
|
||||
|
||||
// Requesting to create a pod that requests resourceName1 in init containers and normal containers
|
||||
// should succeed with devices allocated to init containers reallocated to normal containers.
|
||||
podWithPluginResourcesInInitContainers := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: uuid.NewUUID(),
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
InitContainers: []v1.Container{
|
||||
{
|
||||
Name: string(uuid.NewUUID()),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(resourceName1): resourceQuantity2,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: string(uuid.NewUUID()),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(resourceName1): resourceQuantity1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: string(uuid.NewUUID()),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(resourceName1): resourceQuantity2,
|
||||
v1.ResourceName(resourceName2): resourceQuantity2,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: string(uuid.NewUUID()),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(resourceName1): resourceQuantity2,
|
||||
v1.ResourceName(resourceName2): resourceQuantity2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInInitContainers})
|
||||
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: podWithPluginResourcesInInitContainers})
|
||||
as.Nil(err)
|
||||
podUID := string(podWithPluginResourcesInInitContainers.UID)
|
||||
initCont1 := podWithPluginResourcesInInitContainers.Spec.InitContainers[0].Name
|
||||
initCont2 := podWithPluginResourcesInInitContainers.Spec.InitContainers[1].Name
|
||||
normalCont1 := podWithPluginResourcesInInitContainers.Spec.Containers[0].Name
|
||||
normalCont2 := podWithPluginResourcesInInitContainers.Spec.Containers[1].Name
|
||||
initCont1Devices := testManager.podDevices.containerDevices(podUID, initCont1, resourceName1)
|
||||
initCont2Devices := testManager.podDevices.containerDevices(podUID, initCont2, resourceName1)
|
||||
normalCont1Devices := testManager.podDevices.containerDevices(podUID, normalCont1, resourceName1)
|
||||
normalCont2Devices := testManager.podDevices.containerDevices(podUID, normalCont2, resourceName1)
|
||||
as.True(initCont2Devices.IsSuperset(initCont1Devices))
|
||||
as.True(initCont2Devices.IsSuperset(normalCont1Devices))
|
||||
as.True(initCont2Devices.IsSuperset(normalCont2Devices))
|
||||
as.Equal(0, normalCont1Devices.Intersection(normalCont2Devices).Len())
|
||||
}
|
||||
|
||||
func TestSanitizeNodeAllocatable(t *testing.T) {
|
||||
resourceName1 := "domain1.com/resource1"
|
||||
devID1 := "dev1"
|
||||
|
||||
resourceName2 := "domain2.com/resource2"
|
||||
devID2 := "dev2"
|
||||
|
||||
as := assert.New(t)
|
||||
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
|
||||
|
||||
testManager := &ManagerImpl{
|
||||
callback: monitorCallback,
|
||||
allDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
}
|
||||
// require one of resource1 and one of resource2
|
||||
testManager.allocatedDevices[resourceName1] = sets.NewString()
|
||||
testManager.allocatedDevices[resourceName1].Insert(devID1)
|
||||
testManager.allocatedDevices[resourceName2] = sets.NewString()
|
||||
testManager.allocatedDevices[resourceName2].Insert(devID2)
|
||||
|
||||
cachedNode := &v1.Node{
|
||||
Status: v1.NodeStatus{
|
||||
Allocatable: v1.ResourceList{
|
||||
// has no resource1 and two of resource2
|
||||
v1.ResourceName(resourceName2): *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||
},
|
||||
},
|
||||
}
|
||||
nodeInfo := &schedulercache.NodeInfo{}
|
||||
nodeInfo.SetNode(cachedNode)
|
||||
|
||||
testManager.sanitizeNodeAllocatable(nodeInfo)
|
||||
|
||||
allocatableScalarResources := nodeInfo.AllocatableResource().ScalarResources
|
||||
// allocatable in nodeInfo is less than needed, should update
|
||||
as.Equal(1, int(allocatableScalarResources[v1.ResourceName(resourceName1)]))
|
||||
// allocatable in nodeInfo is more than needed, should skip updating
|
||||
as.Equal(2, int(allocatableScalarResources[v1.ResourceName(resourceName2)]))
|
||||
}
|
257
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/pod_devices.go
generated
vendored
Normal file
257
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/pod_devices.go
generated
vendored
Normal file
@ -0,0 +1,257 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
type deviceAllocateInfo struct {
|
||||
// deviceIds contains device Ids allocated to this container for the given resourceName.
|
||||
deviceIds sets.String
|
||||
// allocResp contains cached rpc AllocateResponse.
|
||||
allocResp *pluginapi.AllocateResponse
|
||||
}
|
||||
|
||||
type resourceAllocateInfo map[string]deviceAllocateInfo // Keyed by resourceName.
|
||||
type containerDevices map[string]resourceAllocateInfo // Keyed by containerName.
|
||||
type podDevices map[string]containerDevices // Keyed by podUID.
|
||||
|
||||
func (pdev podDevices) pods() sets.String {
|
||||
ret := sets.NewString()
|
||||
for k := range pdev {
|
||||
ret.Insert(k)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (pdev podDevices) insert(podUID, contName, resource string, devices sets.String, resp *pluginapi.AllocateResponse) {
|
||||
if _, podExists := pdev[podUID]; !podExists {
|
||||
pdev[podUID] = make(containerDevices)
|
||||
}
|
||||
if _, contExists := pdev[podUID][contName]; !contExists {
|
||||
pdev[podUID][contName] = make(resourceAllocateInfo)
|
||||
}
|
||||
pdev[podUID][contName][resource] = deviceAllocateInfo{
|
||||
deviceIds: devices,
|
||||
allocResp: resp,
|
||||
}
|
||||
}
|
||||
|
||||
func (pdev podDevices) delete(pods []string) {
|
||||
for _, uid := range pods {
|
||||
delete(pdev, uid)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns list of device Ids allocated to the given container for the given resource.
|
||||
// Returns nil if we don't have cached state for the given <podUID, contName, resource>.
|
||||
func (pdev podDevices) containerDevices(podUID, contName, resource string) sets.String {
|
||||
if _, podExists := pdev[podUID]; !podExists {
|
||||
return nil
|
||||
}
|
||||
if _, contExists := pdev[podUID][contName]; !contExists {
|
||||
return nil
|
||||
}
|
||||
devs, resourceExists := pdev[podUID][contName][resource]
|
||||
if !resourceExists {
|
||||
return nil
|
||||
}
|
||||
return devs.deviceIds
|
||||
}
|
||||
|
||||
// Populates allocatedResources with the device resources allocated to the specified <podUID, contName>.
|
||||
func (pdev podDevices) addContainerAllocatedResources(podUID, contName string, allocatedResources map[string]sets.String) {
|
||||
containers, exists := pdev[podUID]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
resources, exists := containers[contName]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
for resource, devices := range resources {
|
||||
allocatedResources[resource] = allocatedResources[resource].Union(devices.deviceIds)
|
||||
}
|
||||
}
|
||||
|
||||
// Removes the device resources allocated to the specified <podUID, contName> from allocatedResources.
|
||||
func (pdev podDevices) removeContainerAllocatedResources(podUID, contName string, allocatedResources map[string]sets.String) {
|
||||
containers, exists := pdev[podUID]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
resources, exists := containers[contName]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
for resource, devices := range resources {
|
||||
allocatedResources[resource] = allocatedResources[resource].Difference(devices.deviceIds)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns all of devices allocated to the pods being tracked, keyed by resourceName.
|
||||
func (pdev podDevices) devices() map[string]sets.String {
|
||||
ret := make(map[string]sets.String)
|
||||
for _, containerDevices := range pdev {
|
||||
for _, resources := range containerDevices {
|
||||
for resource, devices := range resources {
|
||||
if _, exists := ret[resource]; !exists {
|
||||
ret[resource] = sets.NewString()
|
||||
}
|
||||
ret[resource] = ret[resource].Union(devices.deviceIds)
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// podDevicesCheckpointEntry is used to record <pod, container> to device allocation information.
|
||||
type podDevicesCheckpointEntry struct {
|
||||
PodUID string
|
||||
ContainerName string
|
||||
ResourceName string
|
||||
DeviceIDs []string
|
||||
AllocResp []byte
|
||||
}
|
||||
|
||||
// Turns podDevices to checkpointData.
|
||||
func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
|
||||
var data []podDevicesCheckpointEntry
|
||||
for podUID, containerDevices := range pdev {
|
||||
for conName, resources := range containerDevices {
|
||||
for resource, devices := range resources {
|
||||
devIds := devices.deviceIds.UnsortedList()
|
||||
if devices.allocResp == nil {
|
||||
glog.Errorf("Can't marshal allocResp for %v %v %v: allocation response is missing", podUID, conName, resource)
|
||||
continue
|
||||
}
|
||||
|
||||
allocResp, err := devices.allocResp.Marshal()
|
||||
if err != nil {
|
||||
glog.Errorf("Can't marshal allocResp for %v %v %v: %v", podUID, conName, resource, err)
|
||||
continue
|
||||
}
|
||||
data = append(data, podDevicesCheckpointEntry{podUID, conName, resource, devIds, allocResp})
|
||||
}
|
||||
}
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
// Populates podDevices from the passed in checkpointData.
|
||||
func (pdev podDevices) fromCheckpointData(data []podDevicesCheckpointEntry) {
|
||||
for _, entry := range data {
|
||||
glog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
|
||||
entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, entry.AllocResp)
|
||||
devIDs := sets.NewString()
|
||||
for _, devID := range entry.DeviceIDs {
|
||||
devIDs.Insert(devID)
|
||||
}
|
||||
allocResp := &pluginapi.AllocateResponse{}
|
||||
err := allocResp.Unmarshal(entry.AllocResp)
|
||||
if err != nil {
|
||||
glog.Errorf("Can't unmarshal allocResp for %v %v %v: %v", entry.PodUID, entry.ContainerName, entry.ResourceName, err)
|
||||
continue
|
||||
}
|
||||
pdev.insert(entry.PodUID, entry.ContainerName, entry.ResourceName, devIDs, allocResp)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns combined container runtime settings to consume the container's allocated devices.
|
||||
func (pdev podDevices) deviceRunContainerOptions(podUID, contName string) *DeviceRunContainerOptions {
|
||||
containers, exists := pdev[podUID]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
resources, exists := containers[contName]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
opts := &DeviceRunContainerOptions{}
|
||||
// Maps to detect duplicate settings.
|
||||
devsMap := make(map[string]string)
|
||||
mountsMap := make(map[string]string)
|
||||
envsMap := make(map[string]string)
|
||||
// Loops through AllocationResponses of all cached device resources.
|
||||
for _, devices := range resources {
|
||||
resp := devices.allocResp
|
||||
// Each Allocate response has the following artifacts.
|
||||
// Environment variables
|
||||
// Mount points
|
||||
// Device files
|
||||
// These artifacts are per resource per container.
|
||||
// Updates RunContainerOptions.Envs.
|
||||
for k, v := range resp.Envs {
|
||||
if e, ok := envsMap[k]; ok {
|
||||
glog.V(3).Infof("skip existing env %s %s", k, v)
|
||||
if e != v {
|
||||
glog.Errorf("Environment variable %s has conflicting setting: %s and %s", k, e, v)
|
||||
}
|
||||
continue
|
||||
}
|
||||
glog.V(4).Infof("add env %s %s", k, v)
|
||||
envsMap[k] = v
|
||||
opts.Envs = append(opts.Envs, kubecontainer.EnvVar{Name: k, Value: v})
|
||||
}
|
||||
|
||||
// Updates RunContainerOptions.Devices.
|
||||
for _, dev := range resp.Devices {
|
||||
if d, ok := devsMap[dev.ContainerPath]; ok {
|
||||
glog.V(3).Infof("skip existing device %s %s", dev.ContainerPath, dev.HostPath)
|
||||
if d != dev.HostPath {
|
||||
glog.Errorf("Container device %s has conflicting mapping host devices: %s and %s",
|
||||
dev.ContainerPath, d, dev.HostPath)
|
||||
}
|
||||
continue
|
||||
}
|
||||
glog.V(4).Infof("add device %s %s", dev.ContainerPath, dev.HostPath)
|
||||
devsMap[dev.ContainerPath] = dev.HostPath
|
||||
opts.Devices = append(opts.Devices, kubecontainer.DeviceInfo{
|
||||
PathOnHost: dev.HostPath,
|
||||
PathInContainer: dev.ContainerPath,
|
||||
Permissions: dev.Permissions,
|
||||
})
|
||||
}
|
||||
// Updates RunContainerOptions.Mounts.
|
||||
for _, mount := range resp.Mounts {
|
||||
if m, ok := mountsMap[mount.ContainerPath]; ok {
|
||||
glog.V(3).Infof("skip existing mount %s %s", mount.ContainerPath, mount.HostPath)
|
||||
if m != mount.HostPath {
|
||||
glog.Errorf("Container mount %s has conflicting mapping host mounts: %s and %s",
|
||||
mount.ContainerPath, m, mount.HostPath)
|
||||
}
|
||||
continue
|
||||
}
|
||||
glog.V(4).Infof("add mount %s %s", mount.ContainerPath, mount.HostPath)
|
||||
mountsMap[mount.ContainerPath] = mount.HostPath
|
||||
opts.Mounts = append(opts.Mounts, kubecontainer.Mount{
|
||||
Name: mount.ContainerPath,
|
||||
ContainerPath: mount.ContainerPath,
|
||||
HostPath: mount.HostPath,
|
||||
ReadOnly: mount.ReadOnly,
|
||||
// TODO: This may need to be part of Device plugin API.
|
||||
SELinuxRelabel: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
return opts
|
||||
}
|
96
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/types.go
generated
vendored
Normal file
96
vendor/k8s.io/kubernetes/pkg/kubelet/cm/deviceplugin/types.go
generated
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package deviceplugin
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1alpha"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
// Manager manages all the Device Plugins running on a node.
|
||||
type Manager interface {
|
||||
// Start starts device plugin registration service.
|
||||
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error
|
||||
|
||||
// Devices is the map of devices that have registered themselves
|
||||
// against the manager.
|
||||
// The map key is the ResourceName of the device plugins.
|
||||
Devices() map[string][]pluginapi.Device
|
||||
|
||||
// Allocate configures and assigns devices to pods. The pods are provided
|
||||
// through the pod admission attributes in the attrs argument. From the
|
||||
// requested device resources, Allocate will communicate with the owning
|
||||
// device plugin to allow setup procedures to take place, and for the
|
||||
// device plugin to provide runtime settings to use the device (environment
|
||||
// variables, mount points and device files). The node object is provided
|
||||
// for the device manager to update the node capacity to reflect the
|
||||
// currently available devices.
|
||||
Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error
|
||||
|
||||
// Stop stops the manager.
|
||||
Stop() error
|
||||
|
||||
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
|
||||
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
|
||||
// for the found one. An empty struct is returned in case no cached state is found.
|
||||
GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions
|
||||
|
||||
// GetCapacity returns the amount of available device plugin resource capacity
|
||||
// and inactive device plugin resources previously registered on the node.
|
||||
GetCapacity() (v1.ResourceList, []string)
|
||||
}
|
||||
|
||||
// DeviceRunContainerOptions contains the combined container runtime settings to consume its allocated devices.
|
||||
type DeviceRunContainerOptions struct {
|
||||
// The environment variables list.
|
||||
Envs []kubecontainer.EnvVar
|
||||
// The mounts for the container.
|
||||
Mounts []kubecontainer.Mount
|
||||
// The host devices mapped into the container.
|
||||
Devices []kubecontainer.DeviceInfo
|
||||
}
|
||||
|
||||
// TODO: evaluate whether we need these error definitions.
|
||||
const (
|
||||
// errFailedToDialDevicePlugin is the error raised when the device plugin could not be
|
||||
// reached on the registered socket
|
||||
errFailedToDialDevicePlugin = "failed to dial device plugin:"
|
||||
// errUnsuportedVersion is the error raised when the device plugin uses an API version not
|
||||
// supported by the Kubelet registry
|
||||
errUnsuportedVersion = "requested API version %q is not supported by kubelet. Supported version is %q"
|
||||
// errDevicePluginAlreadyExists is the error raised when a device plugin with the
|
||||
// same Resource Name tries to register itself
|
||||
errDevicePluginAlreadyExists = "another device plugin already registered this Resource Name"
|
||||
// errInvalidResourceName is the error raised when a device plugin is registering
|
||||
// itself with an invalid ResourceName
|
||||
errInvalidResourceName = "the ResourceName %q is invalid"
|
||||
// errEmptyResourceName is the error raised when the resource name field is empty
|
||||
errEmptyResourceName = "invalid Empty ResourceName"
|
||||
|
||||
// errBadSocket is the error raised when the registry socket path is not absolute
|
||||
errBadSocket = "bad socketPath, must be an absolute path:"
|
||||
// errRemoveSocket is the error raised when the registry could not remove the existing socket
|
||||
errRemoveSocket = "failed to remove socket while starting device plugin registry, with error"
|
||||
// errListenSocket is the error raised when the registry could not listen on the socket
|
||||
errListenSocket = "failed to listen to socket while starting device plugin registry, with error"
|
||||
// errListAndWatch is the error raised when ListAndWatch ended unsuccessfully
|
||||
errListAndWatch = "listAndWatch ended unexpectedly for device plugin %s with error %v"
|
||||
)
|
Reference in New Issue
Block a user