vendor files

This commit is contained in:
Serguei Bezverkhi
2018-01-09 13:57:14 -05:00
parent 558bc6c02a
commit 7b24313bd6
16547 changed files with 4527373 additions and 0 deletions

119
vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/BUILD generated vendored Normal file
View File

@ -0,0 +1,119 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"fake_kuberuntime_manager.go",
"helpers.go",
"instrumented_services.go",
"kuberuntime_container.go",
"kuberuntime_gc.go",
"kuberuntime_image.go",
"kuberuntime_logs.go",
"kuberuntime_manager.go",
"kuberuntime_sandbox.go",
"labels.go",
"legacy.go",
"security_context.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/kuberuntime",
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/credentialprovider:go_default_library",
"//pkg/kubelet/apis/cri:go_default_library",
"//pkg/kubelet/apis/cri/v1alpha1/runtime:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/images:go_default_library",
"//pkg/kubelet/kuberuntime/logs:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/prober/results:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/cache:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/security/apparmor:go_default_library",
"//pkg/securitycontext:go_default_library",
"//pkg/util/parsers:go_default_library",
"//pkg/util/selinux:go_default_library",
"//pkg/util/tail:go_default_library",
"//pkg/util/version:go_default_library",
"//vendor/github.com/armon/circbuf:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/google.golang.org/grpc:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/client-go/tools/reference:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"helpers_test.go",
"instrumented_services_test.go",
"kuberuntime_container_test.go",
"kuberuntime_gc_test.go",
"kuberuntime_image_test.go",
"kuberuntime_manager_test.go",
"kuberuntime_sandbox_test.go",
"labels_test.go",
"legacy_test.go",
"security_context_test.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/kuberuntime",
library = ":go_default_library",
deps = [
"//pkg/credentialprovider:go_default_library",
"//pkg/kubelet/apis/cri/testing:go_default_library",
"//pkg/kubelet/apis/cri/v1alpha1/runtime:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/container/testing:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//vendor/github.com/golang/mock/gomock:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/github.com/stretchr/testify/require:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/kubelet/kuberuntime/logs:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -0,0 +1,19 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package kuberuntime contains an implementation of kubecontainer.Runtime using
// the interface in pkg/kubelet/v1.
package kuberuntime

View File

@ -0,0 +1,110 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"net/http"
"time"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol"
"k8s.io/kubernetes/pkg/credentialprovider"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
)
const (
fakeSeccompProfileRoot = "/fakeSeccompProfileRoot"
)
type fakeHTTP struct {
url string
err error
}
func (f *fakeHTTP) Get(url string) (*http.Response, error) {
f.url = url
return nil, f.err
}
type fakePodStateProvider struct {
existingPods map[types.UID]struct{}
runningPods map[types.UID]struct{}
}
func newFakePodStateProvider() *fakePodStateProvider {
return &fakePodStateProvider{
existingPods: make(map[types.UID]struct{}),
runningPods: make(map[types.UID]struct{}),
}
}
func (f *fakePodStateProvider) IsPodDeleted(uid types.UID) bool {
_, found := f.existingPods[uid]
return !found
}
func (f *fakePodStateProvider) IsPodTerminated(uid types.UID) bool {
_, found := f.runningPods[uid]
return !found
}
func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) {
recorder := &record.FakeRecorder{}
kubeRuntimeManager := &kubeGenericRuntimeManager{
recorder: recorder,
cpuCFSQuota: false,
livenessManager: proberesults.NewManager(),
containerRefManager: kubecontainer.NewRefManager(),
machineInfo: machineInfo,
osInterface: osInterface,
runtimeHelper: runtimeHelper,
runtimeService: runtimeService,
imageService: imageService,
keyring: keyring,
seccompProfileRoot: fakeSeccompProfileRoot,
internalLifecycle: cm.NewFakeInternalContainerLifecycle(),
}
typedVersion, err := runtimeService.Version(kubeRuntimeAPIVersion)
if err != nil {
return nil, err
}
kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodStateProvider(), kubeRuntimeManager)
kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
kubeRuntimeManager.imagePuller = images.NewImageManager(
kubecontainer.FilterEventRecorder(recorder),
kubeRuntimeManager,
flowcontrol.NewBackOff(time.Second, 300*time.Second),
false,
0, // Disable image pull throttling by setting QPS to 0,
0,
)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(
&fakeHTTP{},
kubeRuntimeManager,
kubeRuntimeManager)
return kubeRuntimeManager, nil
}

View File

@ -0,0 +1,280 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"fmt"
"path/filepath"
"strconv"
"strings"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
const (
// Taken from lmctfy https://github.com/google/lmctfy/blob/master/lmctfy/controllers/cpu_controller.cc
minShares = 2
sharesPerCPU = 1024
milliCPUToCPU = 1000
// 100000 is equivalent to 100ms
quotaPeriod = 100 * minQuotaPeriod
minQuotaPeriod = 1000
)
type podsByID []*kubecontainer.Pod
func (b podsByID) Len() int { return len(b) }
func (b podsByID) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b podsByID) Less(i, j int) bool { return b[i].ID < b[j].ID }
type containersByID []*kubecontainer.Container
func (b containersByID) Len() int { return len(b) }
func (b containersByID) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b containersByID) Less(i, j int) bool { return b[i].ID.ID < b[j].ID.ID }
// Newest first.
type podSandboxByCreated []*runtimeapi.PodSandbox
func (p podSandboxByCreated) Len() int { return len(p) }
func (p podSandboxByCreated) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p podSandboxByCreated) Less(i, j int) bool { return p[i].CreatedAt > p[j].CreatedAt }
type containerStatusByCreated []*kubecontainer.ContainerStatus
func (c containerStatusByCreated) Len() int { return len(c) }
func (c containerStatusByCreated) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c containerStatusByCreated) Less(i, j int) bool { return c[i].CreatedAt.After(c[j].CreatedAt) }
// toKubeContainerState converts runtimeapi.ContainerState to kubecontainer.ContainerState.
func toKubeContainerState(state runtimeapi.ContainerState) kubecontainer.ContainerState {
switch state {
case runtimeapi.ContainerState_CONTAINER_CREATED:
return kubecontainer.ContainerStateCreated
case runtimeapi.ContainerState_CONTAINER_RUNNING:
return kubecontainer.ContainerStateRunning
case runtimeapi.ContainerState_CONTAINER_EXITED:
return kubecontainer.ContainerStateExited
case runtimeapi.ContainerState_CONTAINER_UNKNOWN:
return kubecontainer.ContainerStateUnknown
}
return kubecontainer.ContainerStateUnknown
}
// toRuntimeProtocol converts v1.Protocol to runtimeapi.Protocol.
func toRuntimeProtocol(protocol v1.Protocol) runtimeapi.Protocol {
switch protocol {
case v1.ProtocolTCP:
return runtimeapi.Protocol_TCP
case v1.ProtocolUDP:
return runtimeapi.Protocol_UDP
}
glog.Warningf("Unknown protocol %q: defaulting to TCP", protocol)
return runtimeapi.Protocol_TCP
}
// toKubeContainer converts runtimeapi.Container to kubecontainer.Container.
func (m *kubeGenericRuntimeManager) toKubeContainer(c *runtimeapi.Container) (*kubecontainer.Container, error) {
if c == nil || c.Id == "" || c.Image == nil {
return nil, fmt.Errorf("unable to convert a nil pointer to a runtime container")
}
annotatedInfo := getContainerInfoFromAnnotations(c.Annotations)
return &kubecontainer.Container{
ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id},
Name: c.GetMetadata().GetName(),
ImageID: c.ImageRef,
Image: c.Image.Image,
Hash: annotatedInfo.Hash,
State: toKubeContainerState(c.State),
}, nil
}
// sandboxToKubeContainer converts runtimeapi.PodSandbox to kubecontainer.Container.
// This is only needed because we need to return sandboxes as if they were
// kubecontainer.Containers to avoid substantial changes to PLEG.
// TODO: Remove this once it becomes obsolete.
func (m *kubeGenericRuntimeManager) sandboxToKubeContainer(s *runtimeapi.PodSandbox) (*kubecontainer.Container, error) {
if s == nil || s.Id == "" {
return nil, fmt.Errorf("unable to convert a nil pointer to a runtime container")
}
return &kubecontainer.Container{
ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: s.Id},
State: kubecontainer.SandboxToContainerState(s.State),
}, nil
}
// getImageUser gets uid or user name that will run the command(s) from image. The function
// guarantees that only one of them is set.
func (m *kubeGenericRuntimeManager) getImageUser(image string) (*int64, string, error) {
imageStatus, err := m.imageService.ImageStatus(&runtimeapi.ImageSpec{Image: image})
if err != nil {
return nil, "", err
}
if imageStatus != nil {
if imageStatus.Uid != nil {
return &imageStatus.GetUid().Value, "", nil
}
if imageStatus.Username != "" {
return nil, imageStatus.Username, nil
}
}
// If non of them is set, treat it as root.
return new(int64), "", nil
}
// isContainerFailed returns true if container has exited and exitcode is not zero.
func isContainerFailed(status *kubecontainer.ContainerStatus) bool {
if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 {
return true
}
return false
}
// milliCPUToShares converts milliCPU to CPU shares
func milliCPUToShares(milliCPU int64) int64 {
if milliCPU == 0 {
// Return 2 here to really match kernel default for zero milliCPU.
return minShares
}
// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
shares := (milliCPU * sharesPerCPU) / milliCPUToCPU
if shares < minShares {
return minShares
}
return shares
}
// milliCPUToQuota converts milliCPU to CFS quota and period values
func milliCPUToQuota(milliCPU int64) (quota int64, period int64) {
// CFS quota is measured in two values:
// - cfs_period_us=100ms (the amount of time to measure usage across)
// - cfs_quota=20ms (the amount of cpu time allowed to be used across a period)
// so in the above example, you are limited to 20% of a single CPU
// for multi-cpu environments, you just scale equivalent amounts
if milliCPU == 0 {
return
}
// we set the period to 100ms by default
period = quotaPeriod
// we then convert your milliCPU to a value normalized over a period
quota = (milliCPU * quotaPeriod) / milliCPUToCPU
// quota needs to be a minimum of 1ms.
if quota < minQuotaPeriod {
quota = minQuotaPeriod
}
return
}
// getStableKey generates a key (string) to uniquely identify a
// (pod, container) tuple. The key should include the content of the
// container, so that any change to the container generates a new key.
func getStableKey(pod *v1.Pod, container *v1.Container) string {
hash := strconv.FormatUint(kubecontainer.HashContainer(container), 16)
return fmt.Sprintf("%s_%s_%s_%s_%s", pod.Name, pod.Namespace, string(pod.UID), container.Name, hash)
}
// buildContainerLogsPath builds log path for container relative to pod logs directory.
func buildContainerLogsPath(containerName string, restartCount int) string {
return fmt.Sprintf("%s_%d.log", containerName, restartCount)
}
// buildFullContainerLogsPath builds absolute log path for container.
func buildFullContainerLogsPath(podUID types.UID, containerName string, restartCount int) string {
return filepath.Join(buildPodLogsDirectory(podUID), buildContainerLogsPath(containerName, restartCount))
}
// buildPodLogsDirectory builds absolute log directory path for a pod sandbox.
func buildPodLogsDirectory(podUID types.UID) string {
return filepath.Join(podLogsRootDirectory, string(podUID))
}
// toKubeRuntimeStatus converts the runtimeapi.RuntimeStatus to kubecontainer.RuntimeStatus.
func toKubeRuntimeStatus(status *runtimeapi.RuntimeStatus) *kubecontainer.RuntimeStatus {
conditions := []kubecontainer.RuntimeCondition{}
for _, c := range status.GetConditions() {
conditions = append(conditions, kubecontainer.RuntimeCondition{
Type: kubecontainer.RuntimeConditionType(c.Type),
Status: c.Status,
Reason: c.Reason,
Message: c.Message,
})
}
return &kubecontainer.RuntimeStatus{Conditions: conditions}
}
// getSysctlsFromAnnotations gets sysctls and unsafeSysctls from annotations.
func getSysctlsFromAnnotations(annotations map[string]string) (map[string]string, error) {
apiSysctls, apiUnsafeSysctls, err := v1helper.SysctlsFromPodAnnotations(annotations)
if err != nil {
return nil, err
}
sysctls := make(map[string]string)
for _, c := range apiSysctls {
sysctls[c.Name] = c.Value
}
for _, c := range apiUnsafeSysctls {
sysctls[c.Name] = c.Value
}
return sysctls, nil
}
// getSeccompProfileFromAnnotations gets seccomp profile from annotations.
// It gets pod's profile if containerName is empty.
func (m *kubeGenericRuntimeManager) getSeccompProfileFromAnnotations(annotations map[string]string, containerName string) string {
// try the pod profile.
profile, profileOK := annotations[v1.SeccompPodAnnotationKey]
if containerName != "" {
// try the container profile.
cProfile, cProfileOK := annotations[v1.SeccompContainerAnnotationKeyPrefix+containerName]
if cProfileOK {
profile = cProfile
profileOK = cProfileOK
}
}
if !profileOK {
return ""
}
if strings.HasPrefix(profile, "localhost/") {
name := strings.TrimPrefix(profile, "localhost/")
fname := filepath.Join(m.seccompProfileRoot, filepath.FromSlash(name))
return "localhost/" + fname
}
return profile
}

View File

@ -0,0 +1,307 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimetesting "k8s.io/kubernetes/pkg/kubelet/apis/cri/testing"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
func TestStableKey(t *testing.T) {
container := &v1.Container{
Name: "test_container",
Image: "foo/image:v1",
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test_pod",
Namespace: "test_pod_namespace",
UID: "test_pod_uid",
},
Spec: v1.PodSpec{
Containers: []v1.Container{*container},
},
}
oldKey := getStableKey(pod, container)
// Updating the container image should change the key.
container.Image = "foo/image:v2"
newKey := getStableKey(pod, container)
assert.NotEqual(t, oldKey, newKey)
}
// TestGetSystclsFromAnnotations tests the logic of getting sysctls from annotations.
func TestGetSystclsFromAnnotations(t *testing.T) {
tests := []struct {
annotations map[string]string
expectedSysctls map[string]string
}{{
annotations: map[string]string{
v1.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000",
v1.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000",
},
expectedSysctls: map[string]string{
"kernel.shmmni": "32768",
"kernel.shmmax": "1000000000",
"knet.ipv4.route.min_pmtu": "1000",
},
}, {
annotations: map[string]string{
v1.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000",
},
expectedSysctls: map[string]string{
"kernel.shmmni": "32768",
"kernel.shmmax": "1000000000",
},
}, {
annotations: map[string]string{
v1.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000",
},
expectedSysctls: map[string]string{
"knet.ipv4.route.min_pmtu": "1000",
},
}}
for i, test := range tests {
actualSysctls, err := getSysctlsFromAnnotations(test.annotations)
assert.NoError(t, err, "TestCase[%d]", i)
assert.Len(t, actualSysctls, len(test.expectedSysctls), "TestCase[%d]", i)
assert.Equal(t, test.expectedSysctls, actualSysctls, "TestCase[%d]", i)
}
}
func TestToKubeContainer(t *testing.T) {
c := &runtimeapi.Container{
Id: "test-id",
Metadata: &runtimeapi.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Image: &runtimeapi.ImageSpec{Image: "test-image"},
ImageRef: "test-image-ref",
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
Annotations: map[string]string{
containerHashLabel: "1234",
},
}
expect := &kubecontainer.Container{
ID: kubecontainer.ContainerID{
Type: runtimetesting.FakeRuntimeName,
ID: "test-id",
},
Name: "test-name",
ImageID: "test-image-ref",
Image: "test-image",
Hash: uint64(0x1234),
State: kubecontainer.ContainerStateRunning,
}
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
got, err := m.toKubeContainer(c)
assert.NoError(t, err)
assert.Equal(t, expect, got)
}
func TestGetImageUser(t *testing.T) {
_, i, m, err := createTestRuntimeManager()
assert.NoError(t, err)
type image struct {
name string
uid *runtimeapi.Int64Value
username string
}
type imageUserValues struct {
// getImageUser can return (*int64)(nil) so comparing with *uid will break
// type cannot be *int64 as Golang does not allow to take the address of a numeric constant"
uid interface{}
username string
err error
}
tests := []struct {
description string
originalImage image
expectedImageUserValues imageUserValues
}{
{
"image without username and uid should return (new(int64), \"\", nil)",
image{
name: "test-image-ref1",
uid: (*runtimeapi.Int64Value)(nil),
username: "",
},
imageUserValues{
uid: int64(0),
username: "",
err: nil,
},
},
{
"image with username and no uid should return ((*int64)nil, imageStatus.Username, nil)",
image{
name: "test-image-ref2",
uid: (*runtimeapi.Int64Value)(nil),
username: "testUser",
},
imageUserValues{
uid: (*int64)(nil),
username: "testUser",
err: nil,
},
},
{
"image with uid should return (*int64, \"\", nil)",
image{
name: "test-image-ref3",
uid: &runtimeapi.Int64Value{
Value: 2,
},
username: "whatever",
},
imageUserValues{
uid: int64(2),
username: "",
err: nil,
},
},
}
i.SetFakeImages([]string{"test-image-ref1", "test-image-ref2", "test-image-ref3"})
for j, test := range tests {
i.Images[test.originalImage.name].Username = test.originalImage.username
i.Images[test.originalImage.name].Uid = test.originalImage.uid
uid, username, err := m.getImageUser(test.originalImage.name)
assert.NoError(t, err, "TestCase[%d]", j)
if test.expectedImageUserValues.uid == (*int64)(nil) {
assert.Equal(t, test.expectedImageUserValues.uid, uid, "TestCase[%d]", j)
} else {
assert.Equal(t, test.expectedImageUserValues.uid, *uid, "TestCase[%d]", j)
}
assert.Equal(t, test.expectedImageUserValues.username, username, "TestCase[%d]", j)
}
}
func TestGetSeccompProfileFromAnnotations(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
require.NoError(t, err)
tests := []struct {
description string
annotation map[string]string
containerName string
expectedProfile string
}{
{
description: "no seccomp should return empty string",
expectedProfile: "",
},
{
description: "no seccomp with containerName should return exmpty string",
containerName: "container1",
expectedProfile: "",
},
{
description: "pod docker/default seccomp profile should return docker/default",
annotation: map[string]string{
v1.SeccompPodAnnotationKey: "docker/default",
},
expectedProfile: "docker/default",
},
{
description: "pod docker/default seccomp profile with containerName should return docker/default",
annotation: map[string]string{
v1.SeccompPodAnnotationKey: "docker/default",
},
containerName: "container1",
expectedProfile: "docker/default",
},
{
description: "pod unconfined seccomp profile should return unconfined",
annotation: map[string]string{
v1.SeccompPodAnnotationKey: "unconfined",
},
expectedProfile: "unconfined",
},
{
description: "pod unconfined seccomp profile with containerName should return unconfined",
annotation: map[string]string{
v1.SeccompPodAnnotationKey: "unconfined",
},
containerName: "container1",
expectedProfile: "unconfined",
},
{
description: "pod localhost seccomp profile should return local profile path",
annotation: map[string]string{
v1.SeccompPodAnnotationKey: "localhost/chmod.json",
},
expectedProfile: "localhost/" + filepath.Join(fakeSeccompProfileRoot, "chmod.json"),
},
{
description: "pod localhost seccomp profile with containerName should return local profile path",
annotation: map[string]string{
v1.SeccompPodAnnotationKey: "localhost/chmod.json",
},
containerName: "container1",
expectedProfile: "localhost/" + filepath.Join(fakeSeccompProfileRoot, "chmod.json"),
},
{
description: "container localhost seccomp profile with containerName should return local profile path",
annotation: map[string]string{
v1.SeccompContainerAnnotationKeyPrefix + "container1": "localhost/chmod.json",
},
containerName: "container1",
expectedProfile: "localhost/" + filepath.Join(fakeSeccompProfileRoot, "chmod.json"),
},
{
description: "container localhost seccomp profile should override pod profile",
annotation: map[string]string{
v1.SeccompPodAnnotationKey: "unconfined",
v1.SeccompContainerAnnotationKeyPrefix + "container1": "localhost/chmod.json",
},
containerName: "container1",
expectedProfile: "localhost/" + filepath.Join(fakeSeccompProfileRoot, "chmod.json"),
},
{
description: "container localhost seccomp profile with unmatched containerName should return empty string",
annotation: map[string]string{
v1.SeccompContainerAnnotationKeyPrefix + "container1": "localhost/chmod.json",
},
containerName: "container2",
expectedProfile: "",
},
}
for i, test := range tests {
seccompProfile := m.getSeccompProfileFromAnnotations(test.annotation, test.containerName)
assert.Equal(t, test.expectedProfile, seccompProfile, "TestCase[%d]", i)
}
}

View File

@ -0,0 +1,294 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"time"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"k8s.io/kubernetes/pkg/kubelet/metrics"
)
// instrumentedRuntimeService wraps the RuntimeService and records the operations
// and errors metrics.
type instrumentedRuntimeService struct {
service internalapi.RuntimeService
}
// Creates an instrumented RuntimeInterface from an existing RuntimeService.
func newInstrumentedRuntimeService(service internalapi.RuntimeService) internalapi.RuntimeService {
return &instrumentedRuntimeService{service: service}
}
// instrumentedImageManagerService wraps the ImageManagerService and records the operations
// and errors metrics.
type instrumentedImageManagerService struct {
service internalapi.ImageManagerService
}
// Creates an instrumented ImageManagerService from an existing ImageManagerService.
func newInstrumentedImageManagerService(service internalapi.ImageManagerService) internalapi.ImageManagerService {
return &instrumentedImageManagerService{service: service}
}
// recordOperation records the duration of the operation.
func recordOperation(operation string, start time.Time) {
metrics.RuntimeOperations.WithLabelValues(operation).Inc()
metrics.RuntimeOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start))
}
// recordError records error for metric if an error occurred.
func recordError(operation string, err error) {
if err != nil {
metrics.RuntimeOperationsErrors.WithLabelValues(operation).Inc()
}
}
func (in instrumentedRuntimeService) Version(apiVersion string) (*runtimeapi.VersionResponse, error) {
const operation = "version"
defer recordOperation(operation, time.Now())
out, err := in.service.Version(apiVersion)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) Status() (*runtimeapi.RuntimeStatus, error) {
const operation = "status"
defer recordOperation(operation, time.Now())
out, err := in.service.Status()
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) CreateContainer(podSandboxID string, config *runtimeapi.ContainerConfig, sandboxConfig *runtimeapi.PodSandboxConfig) (string, error) {
const operation = "create_container"
defer recordOperation(operation, time.Now())
out, err := in.service.CreateContainer(podSandboxID, config, sandboxConfig)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) StartContainer(containerID string) error {
const operation = "start_container"
defer recordOperation(operation, time.Now())
err := in.service.StartContainer(containerID)
recordError(operation, err)
return err
}
func (in instrumentedRuntimeService) StopContainer(containerID string, timeout int64) error {
const operation = "stop_container"
defer recordOperation(operation, time.Now())
err := in.service.StopContainer(containerID, timeout)
recordError(operation, err)
return err
}
func (in instrumentedRuntimeService) RemoveContainer(containerID string) error {
const operation = "remove_container"
defer recordOperation(operation, time.Now())
err := in.service.RemoveContainer(containerID)
recordError(operation, err)
return err
}
func (in instrumentedRuntimeService) ListContainers(filter *runtimeapi.ContainerFilter) ([]*runtimeapi.Container, error) {
const operation = "list_containers"
defer recordOperation(operation, time.Now())
out, err := in.service.ListContainers(filter)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) ContainerStatus(containerID string) (*runtimeapi.ContainerStatus, error) {
const operation = "container_status"
defer recordOperation(operation, time.Now())
out, err := in.service.ContainerStatus(containerID)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) UpdateContainerResources(containerID string, resources *runtimeapi.LinuxContainerResources) error {
const operation = "container_status"
defer recordOperation(operation, time.Now())
err := in.service.UpdateContainerResources(containerID, resources)
recordError(operation, err)
return err
}
func (in instrumentedRuntimeService) ExecSync(containerID string, cmd []string, timeout time.Duration) ([]byte, []byte, error) {
const operation = "exec_sync"
defer recordOperation(operation, time.Now())
stdout, stderr, err := in.service.ExecSync(containerID, cmd, timeout)
recordError(operation, err)
return stdout, stderr, err
}
func (in instrumentedRuntimeService) Exec(req *runtimeapi.ExecRequest) (*runtimeapi.ExecResponse, error) {
const operation = "exec"
defer recordOperation(operation, time.Now())
resp, err := in.service.Exec(req)
recordError(operation, err)
return resp, err
}
func (in instrumentedRuntimeService) Attach(req *runtimeapi.AttachRequest) (*runtimeapi.AttachResponse, error) {
const operation = "attach"
defer recordOperation(operation, time.Now())
resp, err := in.service.Attach(req)
recordError(operation, err)
return resp, err
}
func (in instrumentedRuntimeService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (string, error) {
const operation = "run_podsandbox"
defer recordOperation(operation, time.Now())
out, err := in.service.RunPodSandbox(config)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) StopPodSandbox(podSandboxID string) error {
const operation = "stop_podsandbox"
defer recordOperation(operation, time.Now())
err := in.service.StopPodSandbox(podSandboxID)
recordError(operation, err)
return err
}
func (in instrumentedRuntimeService) RemovePodSandbox(podSandboxID string) error {
const operation = "remove_podsandbox"
defer recordOperation(operation, time.Now())
err := in.service.RemovePodSandbox(podSandboxID)
recordError(operation, err)
return err
}
func (in instrumentedRuntimeService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodSandboxStatus, error) {
const operation = "podsandbox_status"
defer recordOperation(operation, time.Now())
out, err := in.service.PodSandboxStatus(podSandboxID)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]*runtimeapi.PodSandbox, error) {
const operation = "list_podsandbox"
defer recordOperation(operation, time.Now())
out, err := in.service.ListPodSandbox(filter)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) ContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
const operation = "container_stats"
defer recordOperation(operation, time.Now())
out, err := in.service.ContainerStats(containerID)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) ListContainerStats(filter *runtimeapi.ContainerStatsFilter) ([]*runtimeapi.ContainerStats, error) {
const operation = "list_container_stats"
defer recordOperation(operation, time.Now())
out, err := in.service.ListContainerStats(filter)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) PortForward(req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) {
const operation = "port_forward"
defer recordOperation(operation, time.Now())
resp, err := in.service.PortForward(req)
recordError(operation, err)
return resp, err
}
func (in instrumentedRuntimeService) UpdateRuntimeConfig(runtimeConfig *runtimeapi.RuntimeConfig) error {
const operation = "update_runtime_config"
defer recordOperation(operation, time.Now())
err := in.service.UpdateRuntimeConfig(runtimeConfig)
recordError(operation, err)
return err
}
func (in instrumentedImageManagerService) ListImages(filter *runtimeapi.ImageFilter) ([]*runtimeapi.Image, error) {
const operation = "list_images"
defer recordOperation(operation, time.Now())
out, err := in.service.ListImages(filter)
recordError(operation, err)
return out, err
}
func (in instrumentedImageManagerService) ImageStatus(image *runtimeapi.ImageSpec) (*runtimeapi.Image, error) {
const operation = "image_status"
defer recordOperation(operation, time.Now())
out, err := in.service.ImageStatus(image)
recordError(operation, err)
return out, err
}
func (in instrumentedImageManagerService) PullImage(image *runtimeapi.ImageSpec, auth *runtimeapi.AuthConfig) (string, error) {
const operation = "pull_image"
defer recordOperation(operation, time.Now())
imageRef, err := in.service.PullImage(image, auth)
recordError(operation, err)
return imageRef, err
}
func (in instrumentedImageManagerService) RemoveImage(image *runtimeapi.ImageSpec) error {
const operation = "remove_image"
defer recordOperation(operation, time.Now())
err := in.service.RemoveImage(image)
recordError(operation, err)
return err
}
func (in instrumentedImageManagerService) ImageFsInfo() ([]*runtimeapi.FilesystemUsage, error) {
const operation = "image_fs_info"
defer recordOperation(operation, time.Now())
fsInfo, err := in.service.ImageFsInfo()
recordError(operation, err)
return fsInfo, nil
}

View File

@ -0,0 +1,91 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"net"
"net/http"
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"k8s.io/kubernetes/pkg/kubelet/metrics"
)
func TestRecordOperation(t *testing.T) {
prometheus.MustRegister(metrics.RuntimeOperations)
prometheus.MustRegister(metrics.RuntimeOperationsLatency)
prometheus.MustRegister(metrics.RuntimeOperationsErrors)
temporalServer := "127.0.0.1:1234"
l, err := net.Listen("tcp", temporalServer)
assert.NoError(t, err)
defer l.Close()
prometheusUrl := "http://" + temporalServer + "/metrics"
mux := http.NewServeMux()
mux.Handle("/metrics", prometheus.Handler())
server := &http.Server{
Addr: temporalServer,
Handler: mux,
}
go func() {
server.Serve(l)
}()
recordOperation("create_container", time.Now())
runtimeOperationsCounterExpected := "kubelet_runtime_operations{operation_type=\"create_container\"} 1"
runtimeOperationsLatencyExpected := "kubelet_runtime_operations_latency_microseconds_count{operation_type=\"create_container\"} 1"
assert.HTTPBodyContains(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mux.ServeHTTP(w, r)
}), "GET", prometheusUrl, nil, runtimeOperationsCounterExpected)
assert.HTTPBodyContains(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mux.ServeHTTP(w, r)
}), "GET", prometheusUrl, nil, runtimeOperationsLatencyExpected)
}
func TestInstrumentedVersion(t *testing.T) {
fakeRuntime, _, _, _ := createTestRuntimeManager()
irs := newInstrumentedRuntimeService(fakeRuntime)
vr, err := irs.Version("1")
assert.NoError(t, err)
assert.Equal(t, kubeRuntimeAPIVersion, vr.Version)
}
func TestStatus(t *testing.T) {
fakeRuntime, _, _, _ := createTestRuntimeManager()
fakeRuntime.FakeStatus = &runtimeapi.RuntimeStatus{
Conditions: []*runtimeapi.RuntimeCondition{
{Type: runtimeapi.RuntimeReady, Status: false},
{Type: runtimeapi.NetworkReady, Status: true},
},
}
irs := newInstrumentedRuntimeService(fakeRuntime)
actural, err := irs.Status()
assert.NoError(t, err)
expected := &runtimeapi.RuntimeStatus{
Conditions: []*runtimeapi.RuntimeCondition{
{Type: runtimeapi.RuntimeReady, Status: false},
{Type: runtimeapi.NetworkReady, Status: true},
},
}
assert.Equal(t, expected, actural)
}

View File

@ -0,0 +1,862 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"errors"
"fmt"
"io"
"math/rand"
"net/url"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"google.golang.org/grpc"
"github.com/armon/circbuf"
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/util/selinux"
"k8s.io/kubernetes/pkg/util/tail"
)
var (
ErrCreateContainerConfig = errors.New("CreateContainerConfigError")
ErrCreateContainer = errors.New("CreateContainerError")
ErrPostStartHook = errors.New("PostStartHookError")
)
// recordContainerEvent should be used by the runtime manager for all container related events.
// it has sanity checks to ensure that we do not write events that can abuse our masters.
// in particular, it ensures that a containerID never appears in an event message as that
// is prone to causing a lot of distinct events that do not count well.
// it replaces any reference to a containerID with the containerName which is stable, and is what users know.
func (m *kubeGenericRuntimeManager) recordContainerEvent(pod *v1.Pod, container *v1.Container, containerID, eventType, reason, message string, args ...interface{}) {
ref, err := kubecontainer.GenerateContainerRef(pod, container)
if err != nil {
glog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err)
return
}
eventMessage := message
if len(args) > 0 {
eventMessage = fmt.Sprintf(message, args...)
}
// this is a hack, but often the error from the runtime includes the containerID
// which kills our ability to deduplicate events. this protection makes a huge
// difference in the number of unique events
if containerID != "" {
eventMessage = strings.Replace(eventMessage, containerID, container.Name, -1)
}
m.recorder.Event(ref, eventType, reason, eventMessage)
}
// startContainer starts a container and returns a message indicates why it is failed on error.
// It starts the container through the following steps:
// * pull the image
// * create the container
// * start the container
// * run the post start lifecycle hooks (if applicable)
func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandboxConfig *runtimeapi.PodSandboxConfig, container *v1.Container, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, podIP string) (string, error) {
// Step 1: pull the image.
imageRef, msg, err := m.imagePuller.EnsureImageExists(pod, container, pullSecrets)
if err != nil {
m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
return msg, err
}
// Step 2: create the container.
ref, err := kubecontainer.GenerateContainerRef(pod, container)
if err != nil {
glog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err)
}
glog.V(4).Infof("Generating ref for container %s: %#v", container.Name, ref)
// For a new container, the RestartCount should be 0
restartCount := 0
containerStatus := podStatus.FindContainerStatusByName(container.Name)
if containerStatus != nil {
restartCount = containerStatus.RestartCount + 1
}
containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef)
if err != nil {
m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
return grpc.ErrorDesc(err), ErrCreateContainerConfig
}
containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
if err != nil {
m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
return grpc.ErrorDesc(err), ErrCreateContainer
}
err = m.internalLifecycle.PreStartContainer(pod, container, containerID)
if err != nil {
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToStartContainer, "Internal PreStartContainer hook failed: %v", err)
return "Internal PreStartContainer hook failed", err
}
m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, "Created container")
if ref != nil {
m.containerRefManager.SetRef(kubecontainer.ContainerID{
Type: m.runtimeName,
ID: containerID,
}, ref)
}
// Step 3: start the container.
err = m.runtimeService.StartContainer(containerID)
if err != nil {
m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", grpc.ErrorDesc(err))
return grpc.ErrorDesc(err), kubecontainer.ErrRunContainer
}
m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, "Started container")
// Symlink container logs to the legacy container log location for cluster logging
// support.
// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
containerMeta := containerConfig.GetMetadata()
sandboxMeta := podSandboxConfig.GetMetadata()
legacySymlink := legacyLogSymlink(containerID, containerMeta.Name, sandboxMeta.Name,
sandboxMeta.Namespace)
containerLog := filepath.Join(podSandboxConfig.LogDirectory, containerConfig.LogPath)
if err := m.osInterface.Symlink(containerLog, legacySymlink); err != nil {
glog.Errorf("Failed to create legacy symbolic link %q to container %q log %q: %v",
legacySymlink, containerID, containerLog, err)
}
// Step 4: execute the post start hook.
if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
kubeContainerID := kubecontainer.ContainerID{
Type: m.runtimeName,
ID: containerID,
}
msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
if handlerErr != nil {
m.recordContainerEvent(pod, container, kubeContainerID.ID, v1.EventTypeWarning, events.FailedPostStartHook, msg)
if err := m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil); err != nil {
glog.Errorf("Failed to kill container %q(id=%q) in pod %q: %v, %v",
container.Name, kubeContainerID.String(), format.Pod(pod), ErrPostStartHook, err)
}
return msg, ErrPostStartHook
}
}
return "", nil
}
// generateContainerConfig generates container config for kubelet runtime v1.
func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string) (*runtimeapi.ContainerConfig, error) {
opts, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
if err != nil {
return nil, err
}
uid, username, err := m.getImageUser(container.Image)
if err != nil {
return nil, err
}
// Verify RunAsNonRoot. Non-root verification only supports numeric user.
if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil {
return nil, err
}
command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
restartCountUint32 := uint32(restartCount)
config := &runtimeapi.ContainerConfig{
Metadata: &runtimeapi.ContainerMetadata{
Name: container.Name,
Attempt: restartCountUint32,
},
Image: &runtimeapi.ImageSpec{Image: imageRef},
Command: command,
Args: args,
WorkingDir: container.WorkingDir,
Labels: newContainerLabels(container, pod),
Annotations: newContainerAnnotations(container, pod, restartCount),
Devices: makeDevices(opts),
Mounts: m.makeMounts(opts, container),
LogPath: containerLogsPath,
Stdin: container.Stdin,
StdinOnce: container.StdinOnce,
Tty: container.TTY,
Linux: m.generateLinuxContainerConfig(container, pod, uid, username),
}
// set environment variables
envs := make([]*runtimeapi.KeyValue, len(opts.Envs))
for idx := range opts.Envs {
e := opts.Envs[idx]
envs[idx] = &runtimeapi.KeyValue{
Key: e.Name,
Value: e.Value,
}
}
config.Envs = envs
return config, nil
}
// generateLinuxContainerConfig generates linux container config for kubelet runtime v1.
func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) *runtimeapi.LinuxContainerConfig {
lc := &runtimeapi.LinuxContainerConfig{
Resources: &runtimeapi.LinuxContainerResources{},
SecurityContext: m.determineEffectiveSecurityContext(pod, container, uid, username),
}
// set linux container resources
var cpuShares int64
cpuRequest := container.Resources.Requests.Cpu()
cpuLimit := container.Resources.Limits.Cpu()
memoryLimit := container.Resources.Limits.Memory().Value()
oomScoreAdj := int64(qos.GetContainerOOMScoreAdjust(pod, container,
int64(m.machineInfo.MemoryCapacity)))
// If request is not specified, but limit is, we want request to default to limit.
// API server does this for new containers, but we repeat this logic in Kubelet
// for containers running on existing Kubernetes clusters.
if cpuRequest.IsZero() && !cpuLimit.IsZero() {
cpuShares = milliCPUToShares(cpuLimit.MilliValue())
} else {
// if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number
// of CPU shares.
cpuShares = milliCPUToShares(cpuRequest.MilliValue())
}
lc.Resources.CpuShares = cpuShares
if memoryLimit != 0 {
lc.Resources.MemoryLimitInBytes = memoryLimit
}
// Set OOM score of the container based on qos policy. Processes in lower-priority pods should
// be killed first if the system runs out of memory.
lc.Resources.OomScoreAdj = oomScoreAdj
if m.cpuCFSQuota {
// if cpuLimit.Amount is nil, then the appropriate default value is returned
// to allow full usage of cpu resource.
cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
lc.Resources.CpuQuota = cpuQuota
lc.Resources.CpuPeriod = cpuPeriod
}
return lc
}
// makeDevices generates container devices for kubelet runtime v1.
func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device {
devices := make([]*runtimeapi.Device, len(opts.Devices))
for idx := range opts.Devices {
device := opts.Devices[idx]
devices[idx] = &runtimeapi.Device{
HostPath: device.PathOnHost,
ContainerPath: device.PathInContainer,
Permissions: device.Permissions,
}
}
return devices
}
// makeMounts generates container volume mounts for kubelet runtime v1.
func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *v1.Container) []*runtimeapi.Mount {
volumeMounts := []*runtimeapi.Mount{}
for idx := range opts.Mounts {
v := opts.Mounts[idx]
selinuxRelabel := v.SELinuxRelabel && selinux.SELinuxEnabled()
mount := &runtimeapi.Mount{
HostPath: v.HostPath,
ContainerPath: v.ContainerPath,
Readonly: v.ReadOnly,
SelinuxRelabel: selinuxRelabel,
Propagation: v.Propagation,
}
volumeMounts = append(volumeMounts, mount)
}
// The reason we create and mount the log file in here (not in kubelet) is because
// the file's location depends on the ID of the container, and we need to create and
// mount the file before actually starting the container.
if opts.PodContainerDir != "" && len(container.TerminationMessagePath) != 0 {
// Because the PodContainerDir contains pod uid and container name which is unique enough,
// here we just add a random id to make the path unique for different instances
// of the same container.
cid := makeUID()
containerLogPath := filepath.Join(opts.PodContainerDir, cid)
fs, err := m.osInterface.Create(containerLogPath)
if err != nil {
utilruntime.HandleError(fmt.Errorf("error on creating termination-log file %q: %v", containerLogPath, err))
} else {
fs.Close()
// Chmod is needed because ioutil.WriteFile() ends up calling
// open(2) to create the file, so the final mode used is "mode &
// ~umask". But we want to make sure the specified mode is used
// in the file no matter what the umask is.
if err := m.osInterface.Chmod(containerLogPath, 0666); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to set termination-log file permissions %q: %v", containerLogPath, err))
}
selinuxRelabel := selinux.SELinuxEnabled()
volumeMounts = append(volumeMounts, &runtimeapi.Mount{
HostPath: containerLogPath,
ContainerPath: container.TerminationMessagePath,
SelinuxRelabel: selinuxRelabel,
})
}
}
return volumeMounts
}
// getKubeletContainers lists containers managed by kubelet.
// The boolean parameter specifies whether returns all containers including
// those already exited and dead containers (used for garbage collection).
func (m *kubeGenericRuntimeManager) getKubeletContainers(allContainers bool) ([]*runtimeapi.Container, error) {
filter := &runtimeapi.ContainerFilter{}
if !allContainers {
filter.State = &runtimeapi.ContainerStateValue{
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
}
}
containers, err := m.runtimeService.ListContainers(filter)
if err != nil {
glog.Errorf("getKubeletContainers failed: %v", err)
return nil, err
}
return containers, nil
}
// makeUID returns a randomly generated string.
func makeUID() string {
return fmt.Sprintf("%08x", rand.Uint32())
}
// getTerminationMessage looks on the filesystem for the provided termination message path, returning a limited
// amount of those bytes, or returns true if the logs should be checked.
func getTerminationMessage(status *runtimeapi.ContainerStatus, terminationMessagePath string, fallbackToLogs bool) (string, bool) {
if len(terminationMessagePath) == 0 {
return "", fallbackToLogs
}
for _, mount := range status.Mounts {
if mount.ContainerPath != terminationMessagePath {
continue
}
path := mount.HostPath
data, _, err := tail.ReadAtMost(path, kubecontainer.MaxContainerTerminationMessageLength)
if err != nil {
if os.IsNotExist(err) {
return "", fallbackToLogs
}
return fmt.Sprintf("Error on reading termination log %s: %v", path, err), false
}
return string(data), (fallbackToLogs && len(data) == 0)
}
return "", fallbackToLogs
}
// readLastStringFromContainerLogs attempts to read up to the max log length from the end of the CRI log represented
// by path. It reads up to max log lines.
func (m *kubeGenericRuntimeManager) readLastStringFromContainerLogs(path string) string {
value := int64(kubecontainer.MaxContainerTerminationMessageLogLines)
buf, _ := circbuf.NewBuffer(kubecontainer.MaxContainerTerminationMessageLogLength)
if err := m.ReadLogs(path, "", &v1.PodLogOptions{TailLines: &value}, buf, buf); err != nil {
return fmt.Sprintf("Error on reading termination message from logs: %v", err)
}
return buf.String()
}
// getPodContainerStatuses gets all containers' statuses for the pod.
func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, name, namespace string) ([]*kubecontainer.ContainerStatus, error) {
// Select all containers of the given pod.
containers, err := m.runtimeService.ListContainers(&runtimeapi.ContainerFilter{
LabelSelector: map[string]string{types.KubernetesPodUIDLabel: string(uid)},
})
if err != nil {
glog.Errorf("ListContainers error: %v", err)
return nil, err
}
statuses := make([]*kubecontainer.ContainerStatus, len(containers))
// TODO: optimization: set maximum number of containers per container name to examine.
for i, c := range containers {
status, err := m.runtimeService.ContainerStatus(c.Id)
if err != nil {
glog.Errorf("ContainerStatus for %s error: %v", c.Id, err)
return nil, err
}
cStatus := toKubeContainerStatus(status, m.runtimeName)
if status.State == runtimeapi.ContainerState_CONTAINER_EXITED {
// Populate the termination message if needed.
annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
labeledInfo := getContainerInfoFromLabels(status.Labels)
fallbackToLogs := annotatedInfo.TerminationMessagePolicy == v1.TerminationMessageFallbackToLogsOnError && cStatus.ExitCode != 0
tMessage, checkLogs := getTerminationMessage(status, annotatedInfo.TerminationMessagePath, fallbackToLogs)
if checkLogs {
// if dockerLegacyService is populated, we're supposed to use it to fetch logs
if m.legacyLogProvider != nil {
tMessage, err = m.legacyLogProvider.GetContainerLogTail(uid, name, namespace, kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id})
if err != nil {
tMessage = fmt.Sprintf("Error reading termination message from logs: %v", err)
}
} else {
path := buildFullContainerLogsPath(uid, labeledInfo.ContainerName, annotatedInfo.RestartCount)
tMessage = m.readLastStringFromContainerLogs(path)
}
}
// Use the termination message written by the application is not empty
if len(tMessage) != 0 {
cStatus.Message = tMessage
}
}
statuses[i] = cStatus
}
sort.Sort(containerStatusByCreated(statuses))
return statuses, nil
}
func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.ContainerStatus {
annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
labeledInfo := getContainerInfoFromLabels(status.Labels)
cStatus := &kubecontainer.ContainerStatus{
ID: kubecontainer.ContainerID{
Type: runtimeName,
ID: status.Id,
},
Name: labeledInfo.ContainerName,
Image: status.Image.Image,
ImageID: status.ImageRef,
Hash: annotatedInfo.Hash,
RestartCount: annotatedInfo.RestartCount,
State: toKubeContainerState(status.State),
CreatedAt: time.Unix(0, status.CreatedAt),
}
if status.State != runtimeapi.ContainerState_CONTAINER_CREATED {
// If container is not in the created state, we have tried and
// started the container. Set the StartedAt time.
cStatus.StartedAt = time.Unix(0, status.StartedAt)
}
if status.State == runtimeapi.ContainerState_CONTAINER_EXITED {
cStatus.Reason = status.Reason
cStatus.Message = status.Message
cStatus.ExitCode = int(status.ExitCode)
cStatus.FinishedAt = time.Unix(0, status.FinishedAt)
}
return cStatus
}
// executePreStopHook runs the pre-stop lifecycle hooks if applicable and returns the duration it takes.
func (m *kubeGenericRuntimeManager) executePreStopHook(pod *v1.Pod, containerID kubecontainer.ContainerID, containerSpec *v1.Container, gracePeriod int64) int64 {
glog.V(3).Infof("Running preStop hook for container %q", containerID.String())
start := metav1.Now()
done := make(chan struct{})
go func() {
defer close(done)
defer utilruntime.HandleCrash()
if msg, err := m.runner.Run(containerID, pod, containerSpec, containerSpec.Lifecycle.PreStop); err != nil {
glog.Errorf("preStop hook for container %q failed: %v", containerSpec.Name, err)
m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeWarning, events.FailedPreStopHook, msg)
}
}()
select {
case <-time.After(time.Duration(gracePeriod) * time.Second):
glog.V(2).Infof("preStop hook for container %q did not complete in %d seconds", containerID, gracePeriod)
case <-done:
glog.V(3).Infof("preStop hook for container %q completed", containerID)
}
return int64(metav1.Now().Sub(start.Time).Seconds())
}
// restoreSpecsFromContainerLabels restores all information needed for killing a container. In some
// case we may not have pod and container spec when killing a container, e.g. pod is deleted during
// kubelet restart.
// To solve this problem, we've already written necessary information into container labels. Here we
// just need to retrieve them from container labels and restore the specs.
// TODO(random-liu): Add a node e2e test to test this behaviour.
// TODO(random-liu): Change the lifecycle handler to just accept information needed, so that we can
// just pass the needed function not create the fake object.
func (m *kubeGenericRuntimeManager) restoreSpecsFromContainerLabels(containerID kubecontainer.ContainerID) (*v1.Pod, *v1.Container, error) {
var pod *v1.Pod
var container *v1.Container
s, err := m.runtimeService.ContainerStatus(containerID.ID)
if err != nil {
return nil, nil, err
}
l := getContainerInfoFromLabels(s.Labels)
a := getContainerInfoFromAnnotations(s.Annotations)
// Notice that the followings are not full spec. The container killing code should not use
// un-restored fields.
pod = &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: l.PodUID,
Name: l.PodName,
Namespace: l.PodNamespace,
DeletionGracePeriodSeconds: a.PodDeletionGracePeriod,
},
Spec: v1.PodSpec{
TerminationGracePeriodSeconds: a.PodTerminationGracePeriod,
},
}
container = &v1.Container{
Name: l.ContainerName,
Ports: a.ContainerPorts,
TerminationMessagePath: a.TerminationMessagePath,
}
if a.PreStopHandler != nil {
container.Lifecycle = &v1.Lifecycle{
PreStop: a.PreStopHandler,
}
}
return pod, container, nil
}
// killContainer kills a container through the following steps:
// * Run the pre-stop lifecycle hooks (if applicable).
// * Stop the container.
func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, reason string, gracePeriodOverride *int64) error {
var containerSpec *v1.Container
if pod != nil {
if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil {
return fmt.Errorf("failed to get containerSpec %q(id=%q) in pod %q when killing container for reason %q",
containerName, containerID.String(), format.Pod(pod), reason)
}
} else {
// Restore necessary information if one of the specs is nil.
restoredPod, restoredContainer, err := m.restoreSpecsFromContainerLabels(containerID)
if err != nil {
return err
}
pod, containerSpec = restoredPod, restoredContainer
}
// From this point , pod and container must be non-nil.
gracePeriod := int64(minimumGracePeriodInSeconds)
switch {
case pod.DeletionGracePeriodSeconds != nil:
gracePeriod = *pod.DeletionGracePeriodSeconds
case pod.Spec.TerminationGracePeriodSeconds != nil:
gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
}
glog.V(2).Infof("Killing container %q with %d second grace period", containerID.String(), gracePeriod)
// Run internal pre-stop lifecycle hook
if err := m.internalLifecycle.PreStopContainer(containerID.ID); err != nil {
return err
}
// Run the pre-stop lifecycle hooks if applicable and if there is enough time to run it
if containerSpec.Lifecycle != nil && containerSpec.Lifecycle.PreStop != nil && gracePeriod > 0 {
gracePeriod = gracePeriod - m.executePreStopHook(pod, containerID, containerSpec, gracePeriod)
}
// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
}
if gracePeriodOverride != nil {
gracePeriod = *gracePeriodOverride
glog.V(3).Infof("Killing container %q, but using %d second grace period override", containerID, gracePeriod)
}
err := m.runtimeService.StopContainer(containerID.ID, gracePeriod)
if err != nil {
glog.Errorf("Container %q termination failed with gracePeriod %d: %v", containerID.String(), gracePeriod, err)
} else {
glog.V(3).Infof("Container %q exited normally", containerID.String())
}
message := fmt.Sprintf("Killing container with id %s", containerID.String())
if reason != "" {
message = fmt.Sprint(message, ":", reason)
}
m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message)
m.containerRefManager.ClearRef(containerID)
return err
}
// killContainersWithSyncResult kills all pod's containers with sync results.
func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (syncResults []*kubecontainer.SyncResult) {
containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers))
wg := sync.WaitGroup{}
wg.Add(len(runningPod.Containers))
for _, container := range runningPod.Containers {
go func(container *kubecontainer.Container) {
defer utilruntime.HandleCrash()
defer wg.Done()
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
if err := m.killContainer(pod, container.ID, container.Name, "Need to kill Pod", gracePeriodOverride); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
}
containerResults <- killContainerResult
}(container)
}
wg.Wait()
close(containerResults)
for containerResult := range containerResults {
syncResults = append(syncResults, containerResult)
}
return
}
// pruneInitContainersBeforeStart ensures that before we begin creating init
// containers, we have reduced the number of outstanding init containers still
// present. This reduces load on the container garbage collector by only
// preserving the most recent terminated init container.
func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, podStatus *kubecontainer.PodStatus) {
// only the last execution of each init container should be preserved, and only preserve it if it is in the
// list of init containers to keep.
initContainerNames := sets.NewString()
for _, container := range pod.Spec.InitContainers {
initContainerNames.Insert(container.Name)
}
for name := range initContainerNames {
count := 0
for _, status := range podStatus.ContainerStatuses {
if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited {
continue
}
count++
// keep the first init container for this name
if count == 1 {
continue
}
// prune all other init containers that match this container name
glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count)
if err := m.removeContainer(status.ID.ID); err != nil {
utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
continue
}
// remove any references to this container
if _, ok := m.containerRefManager.GetRef(status.ID); ok {
m.containerRefManager.ClearRef(status.ID)
} else {
glog.Warningf("No ref for container %q", status.ID)
}
}
}
}
// Remove all init containres. Note that this function does not check the state
// of the container because it assumes all init containers have been stopped
// before the call happens.
func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *kubecontainer.PodStatus) {
initContainerNames := sets.NewString()
for _, container := range pod.Spec.InitContainers {
initContainerNames.Insert(container.Name)
}
for name := range initContainerNames {
count := 0
for _, status := range podStatus.ContainerStatuses {
if status.Name != name || !initContainerNames.Has(status.Name) {
continue
}
count++
// Purge all init containers that match this container name
glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count)
if err := m.removeContainer(status.ID.ID); err != nil {
utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
continue
}
// Remove any references to this container
if _, ok := m.containerRefManager.GetRef(status.ID); ok {
m.containerRefManager.ClearRef(status.ID)
} else {
glog.Warningf("No ref for container %q", status.ID)
}
}
}
}
// findNextInitContainerToRun returns the status of the last failed container, the
// next init container to start, or done if there are no further init containers.
// Status is only returned if an init container is failed, in which case next will
// point to the current container.
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) {
if len(pod.Spec.InitContainers) == 0 {
return nil, nil, true
}
// If there are failed containers, return the status of the last failed one.
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
container := &pod.Spec.InitContainers[i]
status := podStatus.FindContainerStatusByName(container.Name)
if status != nil && isContainerFailed(status) {
return status, container, false
}
}
// There are no failed containers now.
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
container := &pod.Spec.InitContainers[i]
status := podStatus.FindContainerStatusByName(container.Name)
if status == nil {
continue
}
// container is still running, return not done.
if status.State == kubecontainer.ContainerStateRunning {
return nil, nil, false
}
if status.State == kubecontainer.ContainerStateExited {
// all init containers successful
if i == (len(pod.Spec.InitContainers) - 1) {
return nil, nil, true
}
// all containers up to i successful, go to i+1
return nil, &pod.Spec.InitContainers[i+1], false
}
}
return nil, &pod.Spec.InitContainers[0], false
}
// GetContainerLogs returns logs of a specific container.
func (m *kubeGenericRuntimeManager) GetContainerLogs(pod *v1.Pod, containerID kubecontainer.ContainerID, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) (err error) {
status, err := m.runtimeService.ContainerStatus(containerID.ID)
if err != nil {
return fmt.Errorf("failed to get container status %q: %v", containerID, err)
}
labeledInfo := getContainerInfoFromLabels(status.Labels)
annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
path := buildFullContainerLogsPath(pod.UID, labeledInfo.ContainerName, annotatedInfo.RestartCount)
return m.ReadLogs(path, containerID.ID, logOptions, stdout, stderr)
}
// GetExec gets the endpoint the runtime will serve the exec request from.
func (m *kubeGenericRuntimeManager) GetExec(id kubecontainer.ContainerID, cmd []string, stdin, stdout, stderr, tty bool) (*url.URL, error) {
req := &runtimeapi.ExecRequest{
ContainerId: id.ID,
Cmd: cmd,
Tty: tty,
Stdin: stdin,
Stdout: stdout,
Stderr: stderr,
}
resp, err := m.runtimeService.Exec(req)
if err != nil {
return nil, err
}
return url.Parse(resp.Url)
}
// GetAttach gets the endpoint the runtime will serve the attach request from.
func (m *kubeGenericRuntimeManager) GetAttach(id kubecontainer.ContainerID, stdin, stdout, stderr, tty bool) (*url.URL, error) {
req := &runtimeapi.AttachRequest{
ContainerId: id.ID,
Stdin: stdin,
Stdout: stdout,
Stderr: stderr,
Tty: tty,
}
resp, err := m.runtimeService.Attach(req)
if err != nil {
return nil, err
}
return url.Parse(resp.Url)
}
// RunInContainer synchronously executes the command in the container, and returns the output.
func (m *kubeGenericRuntimeManager) RunInContainer(id kubecontainer.ContainerID, cmd []string, timeout time.Duration) ([]byte, error) {
stdout, stderr, err := m.runtimeService.ExecSync(id.ID, cmd, timeout)
// NOTE(tallclair): This does not correctly interleave stdout & stderr, but should be sufficient
// for logging purposes. A combined output option will need to be added to the ExecSyncRequest
// if more precise output ordering is ever required.
return append(stdout, stderr...), err
}
// removeContainer removes the container and the container logs.
// Notice that we remove the container logs first, so that container will not be removed if
// container logs are failed to be removed, and kubelet will retry this later. This guarantees
// that container logs to be removed with the container.
// Notice that we assume that the container should only be removed in non-running state, and
// it will not write container logs anymore in that state.
func (m *kubeGenericRuntimeManager) removeContainer(containerID string) error {
glog.V(4).Infof("Removing container %q", containerID)
// Call internal container post-stop lifecycle hook.
if err := m.internalLifecycle.PostStopContainer(containerID); err != nil {
return err
}
// Remove the container log.
// TODO: Separate log and container lifecycle management.
if err := m.removeContainerLog(containerID); err != nil {
return err
}
// Remove the container.
return m.runtimeService.RemoveContainer(containerID)
}
// removeContainerLog removes the container log.
func (m *kubeGenericRuntimeManager) removeContainerLog(containerID string) error {
// Remove the container log.
status, err := m.runtimeService.ContainerStatus(containerID)
if err != nil {
return fmt.Errorf("failed to get container status %q: %v", containerID, err)
}
labeledInfo := getContainerInfoFromLabels(status.Labels)
annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
path := buildFullContainerLogsPath(labeledInfo.PodUID, labeledInfo.ContainerName, annotatedInfo.RestartCount)
if err := m.osInterface.Remove(path); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove container %q log %q: %v", containerID, path, err)
}
// Remove the legacy container log symlink.
// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
legacySymlink := legacyLogSymlink(containerID, labeledInfo.ContainerName, labeledInfo.PodName,
labeledInfo.PodNamespace)
if err := m.osInterface.Remove(legacySymlink); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove container %q log legacy symbolic link %q: %v",
containerID, legacySymlink, err)
}
return nil
}
// DeleteContainer removes a container.
func (m *kubeGenericRuntimeManager) DeleteContainer(containerID kubecontainer.ContainerID) error {
return m.removeContainer(containerID.ID)
}

View File

@ -0,0 +1,436 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"path/filepath"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/api/core/v1"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
)
// TestRemoveContainer tests removing the container and its corresponding container logs.
func TestRemoveContainer(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
},
},
}
// Create fake sandbox and container
_, fakeContainers := makeAndSetFakePod(t, m, fakeRuntime, pod)
assert.Equal(t, len(fakeContainers), 1)
containerId := fakeContainers[0].Id
fakeOS := m.osInterface.(*containertest.FakeOS)
err = m.removeContainer(containerId)
assert.NoError(t, err)
// Verify container log is removed
expectedContainerLogPath := filepath.Join(podLogsRootDirectory, "12345678", "foo_0.log")
expectedContainerLogSymlink := legacyLogSymlink(containerId, "foo", "bar", "new")
assert.Equal(t, fakeOS.Removes, []string{expectedContainerLogPath, expectedContainerLogSymlink})
// Verify container is removed
assert.Contains(t, fakeRuntime.Called, "RemoveContainer")
containers, err := fakeRuntime.ListContainers(&runtimeapi.ContainerFilter{Id: containerId})
assert.NoError(t, err)
assert.Empty(t, containers)
}
// TestKillContainer tests killing the container in a Pod.
func TestKillContainer(t *testing.T) {
_, _, m, _ := createTestRuntimeManager()
tests := []struct {
caseName string
pod *v1.Pod
containerID kubecontainer.ContainerID
containerName string
reason string
gracePeriodOverride int64
succeed bool
}{
{
caseName: "Failed to find container in pods, expect to return error",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{UID: "pod1_id", Name: "pod1", Namespace: "default"},
Spec: v1.PodSpec{Containers: []v1.Container{{Name: "empty_container"}}},
},
containerID: kubecontainer.ContainerID{Type: "docker", ID: "not_exist_container_id"},
containerName: "not_exist_container",
reason: "unknown reason",
gracePeriodOverride: 0,
succeed: false,
},
}
for _, test := range tests {
err := m.killContainer(test.pod, test.containerID, test.containerName, test.reason, &test.gracePeriodOverride)
if test.succeed != (err == nil) {
t.Errorf("%s: expected %v, got %v (%v)", test.caseName, test.succeed, (err == nil), err)
}
}
}
// TestToKubeContainerStatus tests the converting the CRI container status to
// the internal type (i.e., toKubeContainerStatus()) for containers in
// different states.
func TestToKubeContainerStatus(t *testing.T) {
cid := &kubecontainer.ContainerID{Type: "testRuntime", ID: "dummyid"}
meta := &runtimeapi.ContainerMetadata{Name: "cname", Attempt: 3}
imageSpec := &runtimeapi.ImageSpec{Image: "fimage"}
var (
createdAt int64 = 327
startedAt int64 = 999
finishedAt int64 = 1278
)
for desc, test := range map[string]struct {
input *runtimeapi.ContainerStatus
expected *kubecontainer.ContainerStatus
}{
"created container": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_CREATED,
CreatedAt: createdAt,
},
expected: &kubecontainer.ContainerStatus{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateCreated,
CreatedAt: time.Unix(0, createdAt),
},
},
"running container": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
StartedAt: startedAt,
},
expected: &kubecontainer.ContainerStatus{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateRunning,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
},
},
"exited container": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_EXITED,
CreatedAt: createdAt,
StartedAt: startedAt,
FinishedAt: finishedAt,
ExitCode: int32(121),
Reason: "GotKilled",
Message: "The container was killed",
},
expected: &kubecontainer.ContainerStatus{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateExited,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
FinishedAt: time.Unix(0, finishedAt),
ExitCode: 121,
Reason: "GotKilled",
Message: "The container was killed",
},
},
"unknown container": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_UNKNOWN,
CreatedAt: createdAt,
StartedAt: startedAt,
},
expected: &kubecontainer.ContainerStatus{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateUnknown,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
},
},
} {
actual := toKubeContainerStatus(test.input, cid.Type)
assert.Equal(t, test.expected, actual, desc)
}
}
func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int) *runtimeapi.ContainerConfig {
container := &pod.Spec.Containers[containerIndex]
podIP := ""
restartCount := 0
opts, _ := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
restartCountUint32 := uint32(restartCount)
envs := make([]*runtimeapi.KeyValue, len(opts.Envs))
expectedConfig := &runtimeapi.ContainerConfig{
Metadata: &runtimeapi.ContainerMetadata{
Name: container.Name,
Attempt: restartCountUint32,
},
Image: &runtimeapi.ImageSpec{Image: container.Image},
Command: container.Command,
Args: []string(nil),
WorkingDir: container.WorkingDir,
Labels: newContainerLabels(container, pod),
Annotations: newContainerAnnotations(container, pod, restartCount),
Devices: makeDevices(opts),
Mounts: m.makeMounts(opts, container),
LogPath: containerLogsPath,
Stdin: container.Stdin,
StdinOnce: container.StdinOnce,
Tty: container.TTY,
Linux: m.generateLinuxContainerConfig(container, pod, new(int64), ""),
Envs: envs,
}
return expectedConfig
}
func TestGenerateContainerConfig(t *testing.T) {
_, imageService, m, err := createTestRuntimeManager()
assert.NoError(t, err)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
Command: []string{"testCommand"},
WorkingDir: "testWorkingDir",
},
},
},
}
expectedConfig := makeExpectedConfig(m, pod, 0)
containerConfig, err := m.generateContainerConfig(&pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image)
assert.NoError(t, err)
assert.Equal(t, expectedConfig, containerConfig, "generate container config for kubelet runtime v1.")
runAsUser := int64(0)
runAsNonRootTrue := true
podWithContainerSecurityContext := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
Command: []string{"testCommand"},
WorkingDir: "testWorkingDir",
SecurityContext: &v1.SecurityContext{
RunAsNonRoot: &runAsNonRootTrue,
RunAsUser: &runAsUser,
},
},
},
},
}
_, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image)
assert.Error(t, err)
imageId, _ := imageService.PullImage(&runtimeapi.ImageSpec{Image: "busybox"}, nil)
image, _ := imageService.ImageStatus(&runtimeapi.ImageSpec{Image: imageId})
image.Uid = nil
image.Username = "test"
podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsUser = nil
podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsNonRoot = &runAsNonRootTrue
_, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image)
assert.Error(t, err, "RunAsNonRoot should fail for non-numeric username")
}
func TestLifeCycleHook(t *testing.T) {
// Setup
fakeRuntime, _, m, _ := createTestRuntimeManager()
gracePeriod := int64(30)
cID := kubecontainer.ContainerID{
Type: "docker",
ID: "foo",
}
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "bar",
Namespace: "default",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
Command: []string{"testCommand"},
WorkingDir: "testWorkingDir",
},
},
},
}
cmdPostStart := &v1.Lifecycle{
PostStart: &v1.Handler{
Exec: &v1.ExecAction{
Command: []string{"PostStartCMD"},
},
},
}
httpLifeCycle := &v1.Lifecycle{
PreStop: &v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Host: "testHost.com",
Path: "/GracefulExit",
},
},
}
cmdLifeCycle := &v1.Lifecycle{
PreStop: &v1.Handler{
Exec: &v1.ExecAction{
Command: []string{"PreStopCMD"},
},
},
}
fakeRunner := &containertest.FakeContainerCommandRunner{}
fakeHttp := &fakeHTTP{}
lcHanlder := lifecycle.NewHandlerRunner(
fakeHttp,
fakeRunner,
nil)
m.runner = lcHanlder
// Configured and works as expected
t.Run("PreStop-CMDExec", func(t *testing.T) {
testPod.Spec.Containers[0].Lifecycle = cmdLifeCycle
m.killContainer(testPod, cID, "foo", "testKill", &gracePeriod)
if fakeRunner.Cmd[0] != cmdLifeCycle.PreStop.Exec.Command[0] {
t.Errorf("CMD Prestop hook was not invoked")
}
})
// Configured and working HTTP hook
t.Run("PreStop-HTTPGet", func(t *testing.T) {
defer func() { fakeHttp.url = "" }()
testPod.Spec.Containers[0].Lifecycle = httpLifeCycle
m.killContainer(testPod, cID, "foo", "testKill", &gracePeriod)
if !strings.Contains(fakeHttp.url, httpLifeCycle.PreStop.HTTPGet.Host) {
t.Errorf("HTTP Prestop hook was not invoked")
}
})
// When there is no time to run PreStopHook
t.Run("PreStop-NoTimeToRun", func(t *testing.T) {
gracePeriodLocal := int64(0)
testPod.DeletionGracePeriodSeconds = &gracePeriodLocal
testPod.Spec.TerminationGracePeriodSeconds = &gracePeriodLocal
m.killContainer(testPod, cID, "foo", "testKill", &gracePeriodLocal)
if strings.Contains(fakeHttp.url, httpLifeCycle.PreStop.HTTPGet.Host) {
t.Errorf("HTTP Should not execute when gracePeriod is 0")
}
})
// Post Start script
t.Run("PostStart-CmdExe", func(t *testing.T) {
// Fake all the things you need before trying to create a container
fakeSandBox, _ := makeAndSetFakePod(t, m, fakeRuntime, testPod)
fakeSandBoxConfig, _ := m.generatePodSandboxConfig(testPod, 0)
testPod.Spec.Containers[0].Lifecycle = cmdPostStart
testContainer := &testPod.Spec.Containers[0]
fakePodStatus := &kubecontainer.PodStatus{
ContainerStatuses: []*kubecontainer.ContainerStatus{
{
ID: kubecontainer.ContainerID{
Type: "docker",
ID: testContainer.Name,
},
Name: testContainer.Name,
State: kubecontainer.ContainerStateCreated,
CreatedAt: time.Unix(0, time.Now().Unix()),
},
},
}
// Now try to create a container, which should in turn invoke PostStart Hook
_, err := m.startContainer(fakeSandBox.Id, fakeSandBoxConfig, testContainer, testPod, fakePodStatus, nil, "")
if err != nil {
t.Errorf("startContainer erro =%v", err)
}
if fakeRunner.Cmd[0] != cmdPostStart.PostStart.Exec.Command[0] {
t.Errorf("CMD PostStart hook was not invoked")
}
})
}

View File

@ -0,0 +1,373 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"fmt"
"os"
"path/filepath"
"sort"
"time"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
// containerGC is the manager of garbage collection.
type containerGC struct {
client internalapi.RuntimeService
manager *kubeGenericRuntimeManager
podStateProvider podStateProvider
}
// NewContainerGC creates a new containerGC.
func NewContainerGC(client internalapi.RuntimeService, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager) *containerGC {
return &containerGC{
client: client,
manager: manager,
podStateProvider: podStateProvider,
}
}
// containerGCInfo is the internal information kept for containers being considered for GC.
type containerGCInfo struct {
// The ID of the container.
id string
// The name of the container.
name string
// Creation time for the container.
createTime time.Time
}
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
type sandboxGCInfo struct {
// The ID of the sandbox.
id string
// Creation time for the sandbox.
createTime time.Time
// If true, the sandbox is ready or still has containers.
active bool
}
// evictUnit is considered for eviction as units of (UID, container name) pair.
type evictUnit struct {
// UID of the pod.
uid types.UID
// Name of the container in the pod.
name string
}
type containersByEvictUnit map[evictUnit][]containerGCInfo
type sandboxesByPodUID map[types.UID][]sandboxGCInfo
// NumContainers returns the number of containers in this map.
func (cu containersByEvictUnit) NumContainers() int {
num := 0
for key := range cu {
num += len(cu[key])
}
return num
}
// NumEvictUnits returns the number of pod in this map.
func (cu containersByEvictUnit) NumEvictUnits() int {
return len(cu)
}
// Newest first.
type byCreated []containerGCInfo
func (a byCreated) Len() int { return len(a) }
func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
// Newest first.
type sandboxByCreated []sandboxGCInfo
func (a sandboxByCreated) Len() int { return len(a) }
func (a sandboxByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
// enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
for key := range evictUnits {
toRemove := len(evictUnits[key]) - MaxContainers
if toRemove > 0 {
evictUnits[key] = cgc.removeOldestN(evictUnits[key], toRemove)
}
}
}
// removeOldestN removes the oldest toRemove containers and returns the resulting slice.
func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo {
// Remove from oldest to newest (last to first).
numToKeep := len(containers) - toRemove
for i := len(containers) - 1; i >= numToKeep; i-- {
if err := cgc.manager.removeContainer(containers[i].id); err != nil {
glog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
}
}
// Assume we removed the containers so that we're not too aggressive.
return containers[:numToKeep]
}
// removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and
// returns the resulting slice.
func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) {
// Remove from oldest to newest (last to first).
numToKeep := len(sandboxes) - toRemove
for i := len(sandboxes) - 1; i >= numToKeep; i-- {
if !sandboxes[i].active {
cgc.removeSandbox(sandboxes[i].id)
}
}
}
// removeSandbox removes the sandbox by sandboxID.
func (cgc *containerGC) removeSandbox(sandboxID string) {
glog.V(4).Infof("Removing sandbox %q", sandboxID)
// In normal cases, kubelet should've already called StopPodSandbox before
// GC kicks in. To guard against the rare cases where this is not true, try
// stopping the sandbox before removing it.
if err := cgc.client.StopPodSandbox(sandboxID); err != nil {
glog.Errorf("Failed to stop sandbox %q before removing: %v", sandboxID, err)
return
}
if err := cgc.client.RemovePodSandbox(sandboxID); err != nil {
glog.Errorf("Failed to remove sandbox %q: %v", sandboxID, err)
}
}
// evictableContainers gets all containers that are evictable. Evictable containers are: not running
// and created more than MinAge ago.
func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return containersByEvictUnit{}, err
}
evictUnits := make(containersByEvictUnit)
newestGCTime := time.Now().Add(-minAge)
for _, container := range containers {
// Prune out running containers.
if container.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
continue
}
createdAt := time.Unix(0, container.CreatedAt)
if newestGCTime.Before(createdAt) {
continue
}
labeledInfo := getContainerInfoFromLabels(container.Labels)
containerInfo := containerGCInfo{
id: container.Id,
name: container.Metadata.Name,
createTime: createdAt,
}
key := evictUnit{
uid: labeledInfo.PodUID,
name: containerInfo.name,
}
evictUnits[key] = append(evictUnits[key], containerInfo)
}
// Sort the containers by age.
for uid := range evictUnits {
sort.Sort(byCreated(evictUnits[uid]))
}
return evictUnits, nil
}
// evict all containers that are evictable
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
// Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
if err != nil {
return err
}
// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
for key, unit := range evictUnits {
if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key)
}
}
}
// Enforce max containers per evict unit.
if gcPolicy.MaxPerPodContainer >= 0 {
cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
}
// Enforce max total number of containers.
if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
// Leave an equal number of containers per evict unit (min: 1).
numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
if numContainersPerEvictUnit < 1 {
numContainersPerEvictUnit = 1
}
cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
// If we still need to evict, evict oldest first.
numContainers := evictUnits.NumContainers()
if numContainers > gcPolicy.MaxContainers {
flattened := make([]containerGCInfo, 0, numContainers)
for key := range evictUnits {
flattened = append(flattened, evictUnits[key]...)
}
sort.Sort(byCreated(flattened))
cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
}
}
return nil
}
// evictSandboxes remove all evictable sandboxes. An evictable sandbox must
// meet the following requirements:
// 1. not in ready state
// 2. contains no containers.
// 3. belong to a non-existent (i.e., already removed) pod, or is not the
// most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
}
// collect all the PodSandboxId of container
sandboxIDs := sets.NewString()
for _, container := range containers {
sandboxIDs.Insert(container.PodSandboxId)
}
sandboxes, err := cgc.manager.getKubeletSandboxes(true)
if err != nil {
return err
}
sandboxesByPod := make(sandboxesByPodUID)
for _, sandbox := range sandboxes {
podUID := types.UID(sandbox.Metadata.Uid)
sandboxInfo := sandboxGCInfo{
id: sandbox.Id,
createTime: time.Unix(0, sandbox.CreatedAt),
}
// Set ready sandboxes to be active.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
sandboxInfo.active = true
}
// Set sandboxes that still have containers to be active.
if sandboxIDs.Has(sandbox.Id) {
sandboxInfo.active = true
}
sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
}
// Sort the sandboxes by age.
for uid := range sandboxesByPod {
sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
} else {
// Keep latest one if the pod still exists.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
}
}
return nil
}
// evictPodLogsDirectories evicts all evictable pod logs directories. Pod logs directories
// are evictable if there are no corresponding pods.
func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
osInterface := cgc.manager.osInterface
if allSourcesReady {
// Only remove pod logs directories when all sources are ready.
dirs, err := osInterface.ReadDir(podLogsRootDirectory)
if err != nil {
return fmt.Errorf("failed to read podLogsRootDirectory %q: %v", podLogsRootDirectory, err)
}
for _, dir := range dirs {
name := dir.Name()
podUID := types.UID(name)
if !cgc.podStateProvider.IsPodDeleted(podUID) {
continue
}
err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
if err != nil {
glog.Errorf("Failed to remove pod logs directory %q: %v", name, err)
}
}
}
// Remove dead container log symlinks.
// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
for _, logSymlink := range logSymlinks {
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
err := osInterface.Remove(logSymlink)
if err != nil {
glog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
}
}
}
return nil
}
// GarbageCollect removes dead containers using the specified container gc policy.
// Note that gc policy is not applied to sandboxes. Sandboxes are only removed when they are
// not ready and containing no containers.
//
// GarbageCollect consists of the following steps:
// * gets evictable containers which are not active and created more than gcPolicy.MinAge ago.
// * removes oldest dead containers for each pod by enforcing gcPolicy.MaxPerPodContainer.
// * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
// * gets evictable sandboxes which are not ready and contains no containers.
// * removes evictable sandboxes.
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
// Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil {
return err
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(evictTerminatedPods); err != nil {
return err
}
// Remove pod sandbox log directory
return cgc.evictPodLogsDirectories(allSourcesReady)
}

View File

@ -0,0 +1,399 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"os"
"path/filepath"
"testing"
"time"
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
)
func TestSandboxGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodStateProvider bool, createdAt int64) sandboxTemplate {
if withPodStateProvider {
// initialize the pod getter
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return sandboxTemplate{
pod: pod,
state: state,
attempt: attempt,
createdAt: createdAt,
}
}
pods := []*v1.Pod{
makeTestPod("foo1", "new", "1234", []v1.Container{
makeTestContainer("bar1", "busybox"),
makeTestContainer("bar2", "busybox"),
}),
makeTestPod("foo2", "new", "5678", []v1.Container{
makeTestContainer("bar3", "busybox"),
}),
makeTestPod("deleted", "new", "9012", []v1.Container{
makeTestContainer("bar4", "busybox"),
}),
}
for c, test := range []struct {
description string // description of the test case
sandboxes []sandboxTemplate // templates of sandboxes
containers []containerTemplate // templates of containers
minAge time.Duration // sandboxMinGCAge
remain []int // template indexes of remaining sandboxes
evictTerminatedPods bool
}{
{
description: "notready sandboxes without containers for deleted pods should be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0),
},
containers: []containerTemplate{},
remain: []int{},
evictTerminatedPods: false,
},
{
description: "ready sandboxes without containers for deleted pods should not be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false, 0),
},
containers: []containerTemplate{},
remain: []int{0},
evictTerminatedPods: false,
},
{
description: "sandboxes for existing pods should not be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
},
containers: []containerTemplate{},
remain: []int{0, 1},
evictTerminatedPods: false,
},
{
description: "non-running sandboxes for existing pods should be garbage collected if evictTerminatedPods is set.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
},
containers: []containerTemplate{},
remain: []int{0},
evictTerminatedPods: true,
},
{
description: "sandbox with containers should not be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0),
},
containers: []containerTemplate{
{pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0},
evictTerminatedPods: false,
},
{
description: "multiple sandboxes should be handled properly.",
sandboxes: []sandboxTemplate{
// running sandbox.
makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true, 1),
// exited sandbox without containers.
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
// exited sandbox with containers.
makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 1),
// exited sandbox without containers.
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
// exited sandbox without containers for deleted pods.
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0),
},
containers: []containerTemplate{
{pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0, 2},
evictTerminatedPods: false,
},
} {
t.Logf("TestCase #%d: %+v", c, test)
fakeSandboxes := makeFakePodSandboxes(t, m, test.sandboxes)
fakeContainers := makeFakeContainers(t, m, test.containers)
fakeRuntime.SetFakeSandboxes(fakeSandboxes)
fakeRuntime.SetFakeContainers(fakeContainers)
err := m.containerGC.evictSandboxes(test.evictTerminatedPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListPodSandbox(nil)
assert.NoError(t, err)
assert.Len(t, realRemain, len(test.remain))
for _, remain := range test.remain {
status, err := fakeRuntime.PodSandboxStatus(fakeSandboxes[remain].Id)
assert.NoError(t, err)
assert.Equal(t, &fakeSandboxes[remain].PodSandboxStatus, status)
}
}
}
func TestContainerGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
container := makeTestContainer(containerName, "test-image")
pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
if podName == "running" {
podStateProvider.runningPods[pod.UID] = struct{}{}
}
if podName != "deleted" {
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return containerTemplate{
pod: pod,
container: &container,
attempt: attempt,
createdAt: createdAt,
state: state,
}
}
defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6}
for c, test := range []struct {
description string // description of the test case
containers []containerTemplate // templates of containers
policy *kubecontainer.ContainerGCPolicy // container gc policy
remain []int // template indexes of remaining containers
evictTerminatedPods bool
}{
{
description: "all containers should be removed when max container limit is 0",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
remain: []int{},
evictTerminatedPods: false,
},
{
description: "max containers should be complied when no max per pod container limit is set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
remain: []int{0, 1, 2, 3},
evictTerminatedPods: false,
},
{
description: "no containers should be removed if both max container and per pod container limits are not set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
remain: []int{0, 1, 2},
evictTerminatedPods: false,
},
{
description: "recently started containers should not be removed",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
evictTerminatedPods: false,
},
{
description: "oldest containers should be removed when per pod container limit exceeded",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
evictTerminatedPods: false,
},
{
description: "running containers should not be removed",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{0, 1, 2},
evictTerminatedPods: false,
},
{
description: "no containers should be removed when limits are not exceeded",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
evictTerminatedPods: false,
},
{
description: "max container count should apply per (UID, container) pair",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 3, 4, 6, 7},
evictTerminatedPods: false,
},
{
description: "max limit should apply and try to keep from every pod",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8},
evictTerminatedPods: false,
},
{
description: "oldest pods should be removed if limit exceeded",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8, 9},
evictTerminatedPods: false,
},
{
description: "all non-running containers should be removed when evictTerminatedPods is set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{4, 5},
evictTerminatedPods: true,
},
{
description: "containers for deleted pods should be removed",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
// deleted pods still respect MinAge.
makeGCContainer("deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
evictTerminatedPods: false,
},
} {
t.Logf("TestCase #%d: %+v", c, test)
fakeContainers := makeFakeContainers(t, m, test.containers)
fakeRuntime.SetFakeContainers(fakeContainers)
if test.policy == nil {
test.policy = &defaultGCPolicy
}
err := m.containerGC.evictContainers(*test.policy, true, test.evictTerminatedPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListContainers(nil)
assert.NoError(t, err)
assert.Len(t, realRemain, len(test.remain))
for _, remain := range test.remain {
status, err := fakeRuntime.ContainerStatus(fakeContainers[remain].Id)
assert.NoError(t, err)
assert.Equal(t, &fakeContainers[remain].ContainerStatus, status)
}
}
}
// Notice that legacy container symlink is not tested since it may be deprecated soon.
func TestPodLogDirectoryGC(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
fakeOS := m.osInterface.(*containertest.FakeOS)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
// pod log directories without corresponding pods should be removed.
podStateProvider.existingPods["123"] = struct{}{}
podStateProvider.existingPods["456"] = struct{}{}
podStateProvider.runningPods["123"] = struct{}{}
podStateProvider.runningPods["456"] = struct{}{}
files := []string{"123", "456", "789", "012"}
removed := []string{filepath.Join(podLogsRootDirectory, "789"), filepath.Join(podLogsRootDirectory, "012")}
ctrl := gomock.NewController(t)
defer ctrl.Finish()
fakeOS.ReadDirFn = func(string) ([]os.FileInfo, error) {
var fileInfos []os.FileInfo
for _, file := range files {
mockFI := containertest.NewMockFileInfo(ctrl)
mockFI.EXPECT().Name().Return(file)
fileInfos = append(fileInfos, mockFI)
}
return fileInfos, nil
}
// allSourcesReady == true, pod log directories without corresponding pod should be removed.
err = m.containerGC.evictPodLogsDirectories(true)
assert.NoError(t, err)
assert.Equal(t, removed, fakeOS.Removes)
// allSourcesReady == false, pod log directories should not be removed.
fakeOS.Removes = []string{}
err = m.containerGC.evictPodLogsDirectories(false)
assert.NoError(t, err)
assert.Empty(t, fakeOS.Removes)
}

View File

@ -0,0 +1,148 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"github.com/golang/glog"
"k8s.io/api/core/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/kubernetes/pkg/credentialprovider"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/util/parsers"
)
// PullImage pulls an image from the network to local storage using the supplied
// secrets if necessary.
func (m *kubeGenericRuntimeManager) PullImage(image kubecontainer.ImageSpec, pullSecrets []v1.Secret) (string, error) {
img := image.Image
repoToPull, _, _, err := parsers.ParseImageName(img)
if err != nil {
return "", err
}
keyring, err := credentialprovider.MakeDockerKeyring(pullSecrets, m.keyring)
if err != nil {
return "", err
}
imgSpec := &runtimeapi.ImageSpec{Image: img}
creds, withCredentials := keyring.Lookup(repoToPull)
if !withCredentials {
glog.V(3).Infof("Pulling image %q without credentials", img)
imageRef, err := m.imageService.PullImage(imgSpec, nil)
if err != nil {
glog.Errorf("Pull image %q failed: %v", img, err)
return "", err
}
return imageRef, nil
}
var pullErrs []error
for _, currentCreds := range creds {
authConfig := credentialprovider.LazyProvide(currentCreds)
auth := &runtimeapi.AuthConfig{
Username: authConfig.Username,
Password: authConfig.Password,
Auth: authConfig.Auth,
ServerAddress: authConfig.ServerAddress,
IdentityToken: authConfig.IdentityToken,
RegistryToken: authConfig.RegistryToken,
}
imageRef, err := m.imageService.PullImage(imgSpec, auth)
// If there was no error, return success
if err == nil {
return imageRef, nil
}
pullErrs = append(pullErrs, err)
}
return "", utilerrors.NewAggregate(pullErrs)
}
// GetImageRef gets the reference (digest or ID) of the image which has already been in
// the local storage. It returns ("", nil) if the image isn't in the local storage.
func (m *kubeGenericRuntimeManager) GetImageRef(image kubecontainer.ImageSpec) (string, error) {
status, err := m.imageService.ImageStatus(&runtimeapi.ImageSpec{Image: image.Image})
if err != nil {
glog.Errorf("ImageStatus for image %q failed: %v", image, err)
return "", err
}
if status == nil {
return "", nil
}
imageRef := status.Id
if len(status.RepoDigests) > 0 {
imageRef = status.RepoDigests[0]
}
return imageRef, nil
}
// ListImages gets all images currently on the machine.
func (m *kubeGenericRuntimeManager) ListImages() ([]kubecontainer.Image, error) {
var images []kubecontainer.Image
allImages, err := m.imageService.ListImages(nil)
if err != nil {
glog.Errorf("ListImages failed: %v", err)
return nil, err
}
for _, img := range allImages {
images = append(images, kubecontainer.Image{
ID: img.Id,
Size: int64(img.Size_),
RepoTags: img.RepoTags,
RepoDigests: img.RepoDigests,
})
}
return images, nil
}
// RemoveImage removes the specified image.
func (m *kubeGenericRuntimeManager) RemoveImage(image kubecontainer.ImageSpec) error {
err := m.imageService.RemoveImage(&runtimeapi.ImageSpec{Image: image.Image})
if err != nil {
glog.Errorf("Remove image %q failed: %v", image.Image, err)
return err
}
return nil
}
// ImageStats returns the statistics of the image.
// Notice that current logic doesn't really work for images which share layers (e.g. docker image),
// this is a known issue, and we'll address this by getting imagefs stats directly from CRI.
// TODO: Get imagefs stats directly from CRI.
func (m *kubeGenericRuntimeManager) ImageStats() (*kubecontainer.ImageStats, error) {
allImages, err := m.imageService.ListImages(nil)
if err != nil {
glog.Errorf("ListImages failed: %v", err)
return nil, err
}
stats := &kubecontainer.ImageStats{}
for _, img := range allImages {
stats.TotalStorageBytes += img.Size_
}
return stats, nil
}

View File

@ -0,0 +1,173 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/credentialprovider"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
func TestPullImage(t *testing.T) {
_, _, fakeManager, err := createTestRuntimeManager()
assert.NoError(t, err)
imageRef, err := fakeManager.PullImage(kubecontainer.ImageSpec{Image: "busybox"}, nil)
assert.NoError(t, err)
assert.Equal(t, "busybox", imageRef)
images, err := fakeManager.ListImages()
assert.NoError(t, err)
assert.Equal(t, 1, len(images))
assert.Equal(t, images[0].RepoTags, []string{"busybox"})
}
func TestListImages(t *testing.T) {
_, fakeImageService, fakeManager, err := createTestRuntimeManager()
assert.NoError(t, err)
images := []string{"1111", "2222", "3333"}
expected := sets.NewString(images...)
fakeImageService.SetFakeImages(images)
actualImages, err := fakeManager.ListImages()
assert.NoError(t, err)
actual := sets.NewString()
for _, i := range actualImages {
actual.Insert(i.ID)
}
assert.Equal(t, expected.List(), actual.List())
}
func TestGetImageRef(t *testing.T) {
_, fakeImageService, fakeManager, err := createTestRuntimeManager()
assert.NoError(t, err)
image := "busybox"
fakeImageService.SetFakeImages([]string{image})
imageRef, err := fakeManager.GetImageRef(kubecontainer.ImageSpec{Image: image})
assert.NoError(t, err)
assert.Equal(t, image, imageRef)
}
func TestRemoveImage(t *testing.T) {
_, fakeImageService, fakeManager, err := createTestRuntimeManager()
assert.NoError(t, err)
_, err = fakeManager.PullImage(kubecontainer.ImageSpec{Image: "busybox"}, nil)
assert.NoError(t, err)
assert.Equal(t, 1, len(fakeImageService.Images))
err = fakeManager.RemoveImage(kubecontainer.ImageSpec{Image: "busybox"})
assert.NoError(t, err)
assert.Equal(t, 0, len(fakeImageService.Images))
}
func TestImageStats(t *testing.T) {
_, fakeImageService, fakeManager, err := createTestRuntimeManager()
assert.NoError(t, err)
const imageSize = 64
fakeImageService.SetFakeImageSize(imageSize)
images := []string{"1111", "2222", "3333"}
fakeImageService.SetFakeImages(images)
actualStats, err := fakeManager.ImageStats()
assert.NoError(t, err)
expectedStats := &kubecontainer.ImageStats{TotalStorageBytes: imageSize * uint64(len(images))}
assert.Equal(t, expectedStats, actualStats)
}
func TestPullWithSecrets(t *testing.T) {
// auth value is equivalent to: "username":"passed-user","password":"passed-password"
dockerCfg := map[string]map[string]string{"index.docker.io/v1/": {"email": "passed-email", "auth": "cGFzc2VkLXVzZXI6cGFzc2VkLXBhc3N3b3Jk"}}
dockercfgContent, err := json.Marshal(dockerCfg)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
dockerConfigJson := map[string]map[string]map[string]string{"auths": dockerCfg}
dockerConfigJsonContent, err := json.Marshal(dockerConfigJson)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
tests := map[string]struct {
imageName string
passedSecrets []v1.Secret
builtInDockerConfig credentialprovider.DockerConfig
expectedAuth *runtimeapi.AuthConfig
}{
"no matching secrets": {
"ubuntu",
[]v1.Secret{},
credentialprovider.DockerConfig(map[string]credentialprovider.DockerConfigEntry{}),
nil,
},
"default keyring secrets": {
"ubuntu",
[]v1.Secret{},
credentialprovider.DockerConfig(map[string]credentialprovider.DockerConfigEntry{
"index.docker.io/v1/": {Username: "built-in", Password: "password", Provider: nil},
}),
&runtimeapi.AuthConfig{Username: "built-in", Password: "password"},
},
"default keyring secrets unused": {
"ubuntu",
[]v1.Secret{},
credentialprovider.DockerConfig(map[string]credentialprovider.DockerConfigEntry{
"extraneous": {Username: "built-in", Password: "password", Provider: nil},
}),
nil,
},
"builtin keyring secrets, but use passed": {
"ubuntu",
[]v1.Secret{{Type: v1.SecretTypeDockercfg, Data: map[string][]byte{v1.DockerConfigKey: dockercfgContent}}},
credentialprovider.DockerConfig(map[string]credentialprovider.DockerConfigEntry{
"index.docker.io/v1/": {Username: "built-in", Password: "password", Provider: nil},
}),
&runtimeapi.AuthConfig{Username: "passed-user", Password: "passed-password"},
},
"builtin keyring secrets, but use passed with new docker config": {
"ubuntu",
[]v1.Secret{{Type: v1.SecretTypeDockerConfigJson, Data: map[string][]byte{v1.DockerConfigJsonKey: dockerConfigJsonContent}}},
credentialprovider.DockerConfig(map[string]credentialprovider.DockerConfigEntry{
"index.docker.io/v1/": {Username: "built-in", Password: "password", Provider: nil},
}),
&runtimeapi.AuthConfig{Username: "passed-user", Password: "passed-password"},
},
}
for description, test := range tests {
builtInKeyRing := &credentialprovider.BasicDockerKeyring{}
builtInKeyRing.Add(test.builtInDockerConfig)
_, fakeImageService, fakeManager, err := customTestRuntimeManager(builtInKeyRing)
require.NoError(t, err)
_, err = fakeManager.PullImage(kubecontainer.ImageSpec{Image: test.imageName}, test.passedSecrets)
require.NoError(t, err)
fakeImageService.AssertImagePulledWithAuth(t, &runtimeapi.ImageSpec{Image: test.imageName}, test.expectedAuth, description)
}
}

View File

@ -0,0 +1,35 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"io"
"time"
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/kuberuntime/logs"
)
// ReadLogs read the container log and redirect into stdout and stderr.
// Note that containerID is only needed when following the log, or else
// just pass in empty string "".
func (m *kubeGenericRuntimeManager) ReadLogs(path, containerID string, apiOpts *v1.PodLogOptions, stdout, stderr io.Writer) error {
// Convert v1.PodLogOptions into internal log options.
opts := logs.NewLogOptions(apiOpts, time.Now())
return logs.ReadLogs(path, containerID, opts, m.runtimeService, stdout, stderr)
}

View File

@ -0,0 +1,925 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"errors"
"fmt"
"os"
"time"
"github.com/golang/glog"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/tools/record"
ref "k8s.io/client-go/tools/reference"
"k8s.io/client-go/util/flowcontrol"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/credentialprovider"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/cache"
"k8s.io/kubernetes/pkg/kubelet/util/format"
utilversion "k8s.io/kubernetes/pkg/util/version"
)
const (
// The api version of kubelet runtime api
kubeRuntimeAPIVersion = "0.1.0"
// The root directory for pod logs
podLogsRootDirectory = "/var/log/pods"
// A minimal shutdown window for avoiding unnecessary SIGKILLs
minimumGracePeriodInSeconds = 2
// The expiration time of version cache.
versionCacheTTL = 60 * time.Second
)
var (
// ErrVersionNotSupported is returned when the api version of runtime interface is not supported
ErrVersionNotSupported = errors.New("Runtime api version is not supported")
)
// podStateProvider can determine if a pod is deleted ir terminated
type podStateProvider interface {
IsPodDeleted(kubetypes.UID) bool
IsPodTerminated(kubetypes.UID) bool
}
type kubeGenericRuntimeManager struct {
runtimeName string
recorder record.EventRecorder
osInterface kubecontainer.OSInterface
containerRefManager *kubecontainer.RefManager
// machineInfo contains the machine information.
machineInfo *cadvisorapi.MachineInfo
// Container GC manager
containerGC *containerGC
// Keyring for pulling images
keyring credentialprovider.DockerKeyring
// Runner of lifecycle events.
runner kubecontainer.HandlerRunner
// RuntimeHelper that wraps kubelet to generate runtime container options.
runtimeHelper kubecontainer.RuntimeHelper
// Health check results.
livenessManager proberesults.Manager
// If true, enforce container cpu limits with CFS quota support
cpuCFSQuota bool
// wrapped image puller.
imagePuller images.ImageManager
// gRPC service clients
runtimeService internalapi.RuntimeService
imageService internalapi.ImageManagerService
// The version cache of runtime daemon.
versionCache *cache.ObjectCache
// The directory path for seccomp profiles.
seccompProfileRoot string
// Internal lifecycle event handlers for container resource management.
internalLifecycle cm.InternalContainerLifecycle
// A shim to legacy functions for backward compatibility.
legacyLogProvider LegacyLogProvider
}
type KubeGenericRuntime interface {
kubecontainer.Runtime
kubecontainer.IndirectStreamingRuntime
kubecontainer.ContainerCommandRunner
}
// LegacyLogProvider gives the ability to use unsupported docker log drivers (e.g. journald)
type LegacyLogProvider interface {
// Get the last few lines of the logs for a specific container.
GetContainerLogTail(uid kubetypes.UID, name, namespace string, containerID kubecontainer.ContainerID) (string, error)
}
// NewKubeGenericRuntimeManager creates a new kubeGenericRuntimeManager
func NewKubeGenericRuntimeManager(
recorder record.EventRecorder,
livenessManager proberesults.Manager,
seccompProfileRoot string,
containerRefManager *kubecontainer.RefManager,
machineInfo *cadvisorapi.MachineInfo,
podStateProvider podStateProvider,
osInterface kubecontainer.OSInterface,
runtimeHelper kubecontainer.RuntimeHelper,
httpClient types.HttpGetter,
imageBackOff *flowcontrol.Backoff,
serializeImagePulls bool,
imagePullQPS float32,
imagePullBurst int,
cpuCFSQuota bool,
runtimeService internalapi.RuntimeService,
imageService internalapi.ImageManagerService,
internalLifecycle cm.InternalContainerLifecycle,
legacyLogProvider LegacyLogProvider,
) (KubeGenericRuntime, error) {
kubeRuntimeManager := &kubeGenericRuntimeManager{
recorder: recorder,
cpuCFSQuota: cpuCFSQuota,
seccompProfileRoot: seccompProfileRoot,
livenessManager: livenessManager,
containerRefManager: containerRefManager,
machineInfo: machineInfo,
osInterface: osInterface,
runtimeHelper: runtimeHelper,
runtimeService: newInstrumentedRuntimeService(runtimeService),
imageService: newInstrumentedImageManagerService(imageService),
keyring: credentialprovider.NewDockerKeyring(),
internalLifecycle: internalLifecycle,
legacyLogProvider: legacyLogProvider,
}
typedVersion, err := kubeRuntimeManager.runtimeService.Version(kubeRuntimeAPIVersion)
if err != nil {
glog.Errorf("Get runtime version failed: %v", err)
return nil, err
}
// Only matching kubeRuntimeAPIVersion is supported now
// TODO: Runtime API machinery is under discussion at https://github.com/kubernetes/kubernetes/issues/28642
if typedVersion.Version != kubeRuntimeAPIVersion {
glog.Errorf("Runtime api version %s is not supported, only %s is supported now",
typedVersion.Version,
kubeRuntimeAPIVersion)
return nil, ErrVersionNotSupported
}
kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
glog.Infof("Container runtime %s initialized, version: %s, apiVersion: %s",
typedVersion.RuntimeName,
typedVersion.RuntimeVersion,
typedVersion.RuntimeApiVersion)
// If the container logs directory does not exist, create it.
// TODO: create podLogsRootDirectory at kubelet.go when kubelet is refactored to
// new runtime interface
if _, err := osInterface.Stat(podLogsRootDirectory); os.IsNotExist(err) {
if err := osInterface.MkdirAll(podLogsRootDirectory, 0755); err != nil {
glog.Errorf("Failed to create directory %q: %v", podLogsRootDirectory, err)
}
}
kubeRuntimeManager.imagePuller = images.NewImageManager(
kubecontainer.FilterEventRecorder(recorder),
kubeRuntimeManager,
imageBackOff,
serializeImagePulls,
imagePullQPS,
imagePullBurst)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager)
kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.versionCache = cache.NewObjectCache(
func() (interface{}, error) {
return kubeRuntimeManager.getTypedVersion()
},
versionCacheTTL,
)
return kubeRuntimeManager, nil
}
// Type returns the type of the container runtime.
func (m *kubeGenericRuntimeManager) Type() string {
return m.runtimeName
}
func newRuntimeVersion(version string) (*utilversion.Version, error) {
if ver, err := utilversion.ParseSemantic(version); err == nil {
return ver, err
}
return utilversion.ParseGeneric(version)
}
func (m *kubeGenericRuntimeManager) getTypedVersion() (*runtimeapi.VersionResponse, error) {
typedVersion, err := m.runtimeService.Version(kubeRuntimeAPIVersion)
if err != nil {
glog.Errorf("Get remote runtime typed version failed: %v", err)
return nil, err
}
return typedVersion, nil
}
// Version returns the version information of the container runtime.
func (m *kubeGenericRuntimeManager) Version() (kubecontainer.Version, error) {
typedVersion, err := m.runtimeService.Version(kubeRuntimeAPIVersion)
if err != nil {
glog.Errorf("Get remote runtime version failed: %v", err)
return nil, err
}
return newRuntimeVersion(typedVersion.RuntimeVersion)
}
// APIVersion returns the cached API version information of the container
// runtime. Implementation is expected to update this cache periodically.
// This may be different from the runtime engine's version.
func (m *kubeGenericRuntimeManager) APIVersion() (kubecontainer.Version, error) {
versionObject, err := m.versionCache.Get(m.machineInfo.MachineID)
if err != nil {
return nil, err
}
typedVersion := versionObject.(*runtimeapi.VersionResponse)
return newRuntimeVersion(typedVersion.RuntimeApiVersion)
}
// Status returns the status of the runtime. An error is returned if the Status
// function itself fails, nil otherwise.
func (m *kubeGenericRuntimeManager) Status() (*kubecontainer.RuntimeStatus, error) {
status, err := m.runtimeService.Status()
if err != nil {
return nil, err
}
return toKubeRuntimeStatus(status), nil
}
// GetPods returns a list of containers grouped by pods. The boolean parameter
// specifies whether the runtime returns all containers including those already
// exited and dead containers (used for garbage collection).
func (m *kubeGenericRuntimeManager) GetPods(all bool) ([]*kubecontainer.Pod, error) {
pods := make(map[kubetypes.UID]*kubecontainer.Pod)
sandboxes, err := m.getKubeletSandboxes(all)
if err != nil {
return nil, err
}
for i := range sandboxes {
s := sandboxes[i]
if s.Metadata == nil {
glog.V(4).Infof("Sandbox does not have metadata: %+v", s)
continue
}
podUID := kubetypes.UID(s.Metadata.Uid)
if _, ok := pods[podUID]; !ok {
pods[podUID] = &kubecontainer.Pod{
ID: podUID,
Name: s.Metadata.Name,
Namespace: s.Metadata.Namespace,
}
}
p := pods[podUID]
converted, err := m.sandboxToKubeContainer(s)
if err != nil {
glog.V(4).Infof("Convert %q sandbox %v of pod %q failed: %v", m.runtimeName, s, podUID, err)
continue
}
p.Sandboxes = append(p.Sandboxes, converted)
}
containers, err := m.getKubeletContainers(all)
if err != nil {
return nil, err
}
for i := range containers {
c := containers[i]
if c.Metadata == nil {
glog.V(4).Infof("Container does not have metadata: %+v", c)
continue
}
labelledInfo := getContainerInfoFromLabels(c.Labels)
pod, found := pods[labelledInfo.PodUID]
if !found {
pod = &kubecontainer.Pod{
ID: labelledInfo.PodUID,
Name: labelledInfo.PodName,
Namespace: labelledInfo.PodNamespace,
}
pods[labelledInfo.PodUID] = pod
}
converted, err := m.toKubeContainer(c)
if err != nil {
glog.V(4).Infof("Convert %s container %v of pod %q failed: %v", m.runtimeName, c, labelledInfo.PodUID, err)
continue
}
pod.Containers = append(pod.Containers, converted)
}
// Convert map to list.
var result []*kubecontainer.Pod
for _, pod := range pods {
result = append(result, pod)
}
return result, nil
}
// containerToKillInfo contains necessary information to kill a container.
type containerToKillInfo struct {
// The spec of the container.
container *v1.Container
// The name of the container.
name string
// The message indicates why the container will be killed.
message string
}
// podActions keeps information what to do for a pod.
type podActions struct {
// Stop all running (regular and init) containers and the sandbox for the pod.
KillPod bool
// Whether need to create a new sandbox. If needed to kill pod and create a
// a new pod sandbox, all init containers need to be purged (i.e., removed).
CreateSandbox bool
// The id of existing sandbox. It is used for starting containers in ContainersToStart.
SandboxID string
// The attempt number of creating sandboxes for the pod.
Attempt uint32
// The next init container to start.
NextInitContainerToStart *v1.Container
// ContainersToStart keeps a list of indexes for the containers to start,
// where the index is the index of the specific container in the pod spec (
// pod.Spec.Containers.
ContainersToStart []int
// ContainersToKill keeps a map of containers that need to be killed, note that
// the key is the container ID of the container, while
// the value contains necessary information to kill a container.
ContainersToKill map[kubecontainer.ContainerID]containerToKillInfo
}
// podSandboxChanged checks whether the spec of the pod is changed and returns
// (changed, new attempt, original sandboxID if exist).
func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (bool, uint32, string) {
if len(podStatus.SandboxStatuses) == 0 {
glog.V(2).Infof("No sandbox for pod %q can be found. Need to start a new one", format.Pod(pod))
return true, 0, ""
}
readySandboxCount := 0
for _, s := range podStatus.SandboxStatuses {
if s.State == runtimeapi.PodSandboxState_SANDBOX_READY {
readySandboxCount++
}
}
// Needs to create a new sandbox when readySandboxCount > 1 or the ready sandbox is not the latest one.
sandboxStatus := podStatus.SandboxStatuses[0]
if readySandboxCount > 1 {
glog.V(2).Infof("More than 1 sandboxes for pod %q are ready. Need to reconcile them", format.Pod(pod))
return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}
if sandboxStatus.State != runtimeapi.PodSandboxState_SANDBOX_READY {
glog.V(2).Infof("No ready sandbox for pod %q can be found. Need to start a new one", format.Pod(pod))
return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}
// Needs to create a new sandbox when network namespace changed.
if sandboxStatus.Linux != nil && sandboxStatus.Linux.Namespaces != nil && sandboxStatus.Linux.Namespaces.Options != nil &&
sandboxStatus.Linux.Namespaces.Options.HostNetwork != kubecontainer.IsHostNetworkPod(pod) {
glog.V(2).Infof("Sandbox for pod %q has changed. Need to start a new one", format.Pod(pod))
return true, sandboxStatus.Metadata.Attempt + 1, ""
}
// Needs to create a new sandbox when the sandbox does not have an IP address.
if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network.Ip == "" {
glog.V(2).Infof("Sandbox for pod %q has no IP address. Need to start a new one", format.Pod(pod))
return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
}
return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id
}
func containerChanged(container *v1.Container, containerStatus *kubecontainer.ContainerStatus) (uint64, uint64, bool) {
expectedHash := kubecontainer.HashContainer(container)
return expectedHash, containerStatus.Hash, containerStatus.Hash != expectedHash
}
func shouldRestartOnFailure(pod *v1.Pod) bool {
return pod.Spec.RestartPolicy != v1.RestartPolicyNever
}
func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) bool {
cStatus := podStatus.FindContainerStatusByName(c.Name)
if cStatus == nil || cStatus.State == kubecontainer.ContainerStateRunning {
return false
}
return cStatus.ExitCode == 0
}
// computePodActions checks whether the pod spec has changed and returns the changes if true.
func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
glog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod)
createPodSandbox, attempt, sandboxID := m.podSandboxChanged(pod, podStatus)
changes := podActions{
KillPod: createPodSandbox,
CreateSandbox: createPodSandbox,
SandboxID: sandboxID,
Attempt: attempt,
ContainersToStart: []int{},
ContainersToKill: make(map[kubecontainer.ContainerID]containerToKillInfo),
}
// If we need to (re-)create the pod sandbox, everything will need to be
// killed and recreated, and init containers should be purged.
if createPodSandbox {
if !shouldRestartOnFailure(pod) && attempt != 0 {
// Should not restart the pod, just return.
return changes
}
if len(pod.Spec.InitContainers) != 0 {
// Pod has init containers, return the first one.
changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
return changes
}
// Start all containers by default but exclude the ones that succeeded if
// RestartPolicy is OnFailure.
for idx, c := range pod.Spec.Containers {
if containerSucceeded(&c, podStatus) && pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {
continue
}
changes.ContainersToStart = append(changes.ContainersToStart, idx)
}
return changes
}
// Check initialization progress.
initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
if !done {
if next != nil {
initFailed := initLastStatus != nil && isContainerFailed(initLastStatus)
if initFailed && !shouldRestartOnFailure(pod) {
changes.KillPod = true
} else {
changes.NextInitContainerToStart = next
}
}
// Initialization failed or still in progress. Skip inspecting non-init
// containers.
return changes
}
// Number of running containers to keep.
keepCount := 0
// check the status of containers.
for idx, container := range pod.Spec.Containers {
containerStatus := podStatus.FindContainerStatusByName(container.Name)
// Call internal container post-stop lifecycle hook for any non-running container so that any
// allocated cpus are released immediately. If the container is restarted, cpus will be re-allocated
// to it.
if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
glog.Errorf("internal container post-stop lifecycle hook failed for container %v in pod %v with error %v",
container.Name, pod.Name, err)
}
}
// If container does not exist, or is not running, check whether we
// need to restart it.
if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
glog.Info(message)
changes.ContainersToStart = append(changes.ContainersToStart, idx)
}
continue
}
// The container is running, but kill the container if any of the following condition is met.
reason := ""
restart := shouldRestartOnFailure(pod)
if expectedHash, actualHash, changed := containerChanged(&container, containerStatus); changed {
reason = fmt.Sprintf("Container spec hash changed (%d vs %d).", actualHash, expectedHash)
// Restart regardless of the restart policy because the container
// spec changed.
restart = true
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
// If the container failed the liveness probe, we should kill it.
reason = "Container failed liveness probe."
} else {
// Keep the container.
keepCount += 1
continue
}
// We need to kill the container, but if we also want to restart the
// container afterwards, make the intent clear in the message. Also do
// not kill the entire pod since we expect container to be running eventually.
message := reason
if restart {
message = fmt.Sprintf("%s. Container will be killed and recreated.", message)
changes.ContainersToStart = append(changes.ContainersToStart, idx)
}
changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
name: containerStatus.Name,
container: &pod.Spec.Containers[idx],
message: message,
}
glog.V(2).Infof("Container %q (%q) of pod %s: %s", container.Name, containerStatus.ID, format.Pod(pod), message)
}
if keepCount == 0 && len(changes.ContainersToStart) == 0 {
changes.KillPod = true
}
return changes
}
// SyncPod syncs the running pod into the desired pod by executing following steps:
//
// 1. Compute sandbox and container changes.
// 2. Kill pod sandbox if necessary.
// 3. Kill any containers that should not be running.
// 4. Create sandbox if necessary.
// 5. Create init containers.
// 6. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// Step 1: Compute sandbox and container changes.
podContainerChanges := m.computePodActions(pod, podStatus)
glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
if podContainerChanges.CreateSandbox {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
if podContainerChanges.SandboxID != "" {
m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
} else {
glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
}
}
// Step 2: Kill the pod if the sandbox has changed.
if podContainerChanges.KillPod {
if !podContainerChanges.CreateSandbox {
glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
} else {
glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
}
killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
result.AddPodSyncResult(killResult)
if killResult.Error() != nil {
glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
return
}
if podContainerChanges.CreateSandbox {
m.purgeInitContainers(pod, podStatus)
}
} else {
// Step 3: kill any running containers in this pod which are not to keep.
for containerID, containerInfo := range podContainerChanges.ContainersToKill {
glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerInfo.name, containerID, format.Pod(pod))
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
result.AddSyncResult(killContainerResult)
if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
glog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)
return
}
}
}
// Keep terminated init containers fairly aggressively controlled
// This is an optmization because container removals are typically handled
// by container garbage collector.
m.pruneInitContainersBeforeStart(pod, podStatus)
// We pass the value of the podIP down to generatePodSandboxConfig and
// generateContainerConfig, which in turn passes it to various other
// functions, in order to facilitate functionality that requires this
// value (hosts file and downward API) and avoid races determining
// the pod IP in cases where a container requires restart but the
// podIP isn't in the status manager yet.
//
// We default to the IP in the passed-in pod status, and overwrite it if the
// sandbox needs to be (re)started.
podIP := ""
if podStatus != nil {
podIP = podStatus.IP
}
// Step 4: Create a sandbox for the pod if necessary.
podSandboxID := podContainerChanges.SandboxID
if podContainerChanges.CreateSandbox {
var msg string
var err error
glog.V(4).Infof("Creating sandbox for pod %q", format.Pod(pod))
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox.")
return
}
glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))
podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
if err != nil {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
result.Fail(err)
return
}
// If we ever allow updating a pod from non-host-network to
// host-network, we may use a stale IP.
if !kubecontainer.IsHostNetworkPod(pod) {
// Overwrite the podIP passed in the pod status, since we just started the pod sandbox.
podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
glog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
}
}
// Get podSandboxConfig for containers to start.
configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
result.AddSyncResult(configPodSandboxResult)
podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
return
}
// Step 5: start the init container.
if container := podContainerChanges.NextInitContainerToStart; container != nil {
// Start the next init container.
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
result.AddSyncResult(startContainerResult)
isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
return
}
glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
startContainerResult.Fail(err, msg)
utilruntime.HandleError(fmt.Errorf("init container start failed: %v: %s", err, msg))
return
}
// Successfully started the container; clear the entry in the failure
glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
}
// Step 6: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart {
container := &pod.Spec.Containers[idx]
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
result.AddSyncResult(startContainerResult)
isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
continue
}
glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
startContainerResult.Fail(err, msg)
// known errors that are logged in other places are logged at higher levels here to avoid
// repetitive log spam
switch {
case err == images.ErrImagePullBackOff:
glog.V(3).Infof("container start failed: %v: %s", err, msg)
default:
utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
}
continue
}
}
return
}
// If a container is still in backoff, the function will return a brief backoff error and
// a detailed error message.
func (m *kubeGenericRuntimeManager) doBackOff(pod *v1.Pod, container *v1.Container, podStatus *kubecontainer.PodStatus, backOff *flowcontrol.Backoff) (bool, string, error) {
var cStatus *kubecontainer.ContainerStatus
for _, c := range podStatus.ContainerStatuses {
if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited {
cStatus = c
break
}
}
if cStatus == nil {
return false, "", nil
}
glog.Infof("checking backoff for container %q in pod %q", container.Name, format.Pod(pod))
// Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
ts := cStatus.FinishedAt
// backOff requires a unique key to identify the container.
key := getStableKey(pod, container)
if backOff.IsInBackOffSince(key, ts) {
if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
m.recorder.Eventf(ref, v1.EventTypeWarning, events.BackOffStartContainer, "Back-off restarting failed container")
}
err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(key), container.Name, format.Pod(pod))
glog.Infof("%s", err.Error())
return true, err.Error(), kubecontainer.ErrCrashLoopBackOff
}
backOff.Next(key, ts)
return false, "", nil
}
// KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
// gracePeriodOverride if specified allows the caller to override the pod default grace period.
// only hard kill paths are allowed to specify a gracePeriodOverride in the kubelet in order to not corrupt user data.
// it is useful when doing SIGKILL for hard eviction scenarios, or max grace period during soft eviction scenarios.
func (m *kubeGenericRuntimeManager) KillPod(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error {
err := m.killPodWithSyncResult(pod, runningPod, gracePeriodOverride)
return err.Error()
}
// killPodWithSyncResult kills a runningPod and returns SyncResult.
// Note: The pod passed in could be *nil* when kubelet restarted.
func (m *kubeGenericRuntimeManager) killPodWithSyncResult(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
killContainerResults := m.killContainersWithSyncResult(pod, runningPod, gracePeriodOverride)
for _, containerResult := range killContainerResults {
result.AddSyncResult(containerResult)
}
// stop sandbox, the sandbox will be removed in GarbageCollect
killSandboxResult := kubecontainer.NewSyncResult(kubecontainer.KillPodSandbox, runningPod.ID)
result.AddSyncResult(killSandboxResult)
// Stop all sandboxes belongs to same pod
for _, podSandbox := range runningPod.Sandboxes {
if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil {
killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error())
glog.Errorf("Failed to stop sandbox %q", podSandbox.ID)
}
}
return
}
// isHostNetwork checks whether the pod is running in host-network mode.
func (m *kubeGenericRuntimeManager) isHostNetwork(podSandBoxID string, pod *v1.Pod) (bool, error) {
if pod != nil {
return kubecontainer.IsHostNetworkPod(pod), nil
}
podStatus, err := m.runtimeService.PodSandboxStatus(podSandBoxID)
if err != nil {
return false, err
}
if nsOpts := podStatus.GetLinux().GetNamespaces().GetOptions(); nsOpts != nil {
return nsOpts.HostNetwork, nil
}
return false, nil
}
// GetPodStatus retrieves the status of the pod, including the
// information of all containers in the pod that are visible in Runtime.
func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namespace string) (*kubecontainer.PodStatus, error) {
// Now we retain restart count of container as a container label. Each time a container
// restarts, pod will read the restart count from the registered dead container, increment
// it to get the new restart count, and then add a label with the new restart count on
// the newly started container.
// However, there are some limitations of this method:
// 1. When all dead containers were garbage collected, the container status could
// not get the historical value and would be *inaccurate*. Fortunately, the chance
// is really slim.
// 2. When working with old version containers which have no restart count label,
// we can only assume their restart count is 0.
// Anyhow, we only promised "best-effort" restart count reporting, we can just ignore
// these limitations now.
// TODO: move this comment to SyncPod.
podSandboxIDs, err := m.getSandboxIDByPodUID(uid, nil)
if err != nil {
return nil, err
}
podFullName := format.Pod(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
UID: uid,
},
})
glog.V(4).Infof("getSandboxIDByPodUID got sandbox IDs %q for pod %q", podSandboxIDs, podFullName)
sandboxStatuses := make([]*runtimeapi.PodSandboxStatus, len(podSandboxIDs))
podIP := ""
for idx, podSandboxID := range podSandboxIDs {
podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
if err != nil {
glog.Errorf("PodSandboxStatus of sandbox %q for pod %q error: %v", podSandboxID, podFullName, err)
return nil, err
}
sandboxStatuses[idx] = podSandboxStatus
// Only get pod IP from latest sandbox
if idx == 0 && podSandboxStatus.State == runtimeapi.PodSandboxState_SANDBOX_READY {
podIP = m.determinePodSandboxIP(namespace, name, podSandboxStatus)
}
}
// Get statuses of all containers visible in the pod.
containerStatuses, err := m.getPodContainerStatuses(uid, name, namespace)
if err != nil {
glog.Errorf("getPodContainerStatuses for pod %q failed: %v", podFullName, err)
return nil, err
}
return &kubecontainer.PodStatus{
ID: uid,
Name: name,
Namespace: namespace,
IP: podIP,
SandboxStatuses: sandboxStatuses,
ContainerStatuses: containerStatuses,
}, nil
}
// Returns the filesystem path of the pod's network namespace.
//
// For CRI, container network is handled by the runtime completely and this
// function should never be called.
func (m *kubeGenericRuntimeManager) GetNetNS(_ kubecontainer.ContainerID) (string, error) {
return "", fmt.Errorf("not supported")
}
// GarbageCollect removes dead containers using the specified container gc policy.
func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady, evictNonDeletedPods)
}
// GetPodContainerID gets pod sandbox ID
func (m *kubeGenericRuntimeManager) GetPodContainerID(pod *kubecontainer.Pod) (kubecontainer.ContainerID, error) {
formattedPod := kubecontainer.FormatPod(pod)
if len(pod.Sandboxes) == 0 {
glog.Errorf("No sandboxes are found for pod %q", formattedPod)
return kubecontainer.ContainerID{}, fmt.Errorf("sandboxes for pod %q not found", formattedPod)
}
// return sandboxID of the first sandbox since it is the latest one
return pod.Sandboxes[0].ID, nil
}
// UpdatePodCIDR is just a passthrough method to update the runtimeConfig of the shim
// with the podCIDR supplied by the kubelet.
func (m *kubeGenericRuntimeManager) UpdatePodCIDR(podCIDR string) error {
// TODO(#35531): do we really want to write a method on this manager for each
// field of the config?
glog.Infof("updating runtime config through cri with podcidr %v", podCIDR)
return m.runtimeService.UpdateRuntimeConfig(
&runtimeapi.RuntimeConfig{
NetworkConfig: &runtimeapi.NetworkConfig{
PodCidr: podCIDR,
},
})
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,264 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"fmt"
"net"
"net/url"
"sort"
"github.com/golang/glog"
"k8s.io/api/core/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
)
// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error).
func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) {
podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
return "", message, err
}
// Create pod logs directory
err = m.osInterface.MkdirAll(podSandboxConfig.LogDirectory, 0755)
if err != nil {
message := fmt.Sprintf("Create pod log directory for pod %q failed: %v", format.Pod(pod), err)
glog.Errorf(message)
return "", message, err
}
podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig)
if err != nil {
message := fmt.Sprintf("CreatePodSandbox for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
return "", message, err
}
return podSandBoxID, "", nil
}
// generatePodSandboxConfig generates pod sandbox config from v1.Pod.
func (m *kubeGenericRuntimeManager) generatePodSandboxConfig(pod *v1.Pod, attempt uint32) (*runtimeapi.PodSandboxConfig, error) {
// TODO: deprecating podsandbox resource requirements in favor of the pod level cgroup
// Refer https://github.com/kubernetes/kubernetes/issues/29871
podUID := string(pod.UID)
podSandboxConfig := &runtimeapi.PodSandboxConfig{
Metadata: &runtimeapi.PodSandboxMetadata{
Name: pod.Name,
Namespace: pod.Namespace,
Uid: podUID,
Attempt: attempt,
},
Labels: newPodLabels(pod),
Annotations: newPodAnnotations(pod),
}
dnsConfig, err := m.runtimeHelper.GetPodDNS(pod)
if err != nil {
return nil, err
}
podSandboxConfig.DnsConfig = dnsConfig
if !kubecontainer.IsHostNetworkPod(pod) {
// TODO: Add domain support in new runtime interface
hostname, _, err := m.runtimeHelper.GeneratePodHostNameAndDomain(pod)
if err != nil {
return nil, err
}
podSandboxConfig.Hostname = hostname
}
logDir := buildPodLogsDirectory(pod.UID)
podSandboxConfig.LogDirectory = logDir
portMappings := []*runtimeapi.PortMapping{}
for _, c := range pod.Spec.Containers {
containerPortMappings := kubecontainer.MakePortMappings(&c)
for idx := range containerPortMappings {
port := containerPortMappings[idx]
hostPort := int32(port.HostPort)
containerPort := int32(port.ContainerPort)
protocol := toRuntimeProtocol(port.Protocol)
portMappings = append(portMappings, &runtimeapi.PortMapping{
HostIp: port.HostIP,
HostPort: hostPort,
ContainerPort: containerPort,
Protocol: protocol,
})
}
}
if len(portMappings) > 0 {
podSandboxConfig.PortMappings = portMappings
}
lc, err := m.generatePodSandboxLinuxConfig(pod)
if err != nil {
return nil, err
}
podSandboxConfig.Linux = lc
return podSandboxConfig, nil
}
// generatePodSandboxLinuxConfig generates LinuxPodSandboxConfig from v1.Pod.
func (m *kubeGenericRuntimeManager) generatePodSandboxLinuxConfig(pod *v1.Pod) (*runtimeapi.LinuxPodSandboxConfig, error) {
cgroupParent := m.runtimeHelper.GetPodCgroupParent(pod)
lc := &runtimeapi.LinuxPodSandboxConfig{
CgroupParent: cgroupParent,
SecurityContext: &runtimeapi.LinuxSandboxSecurityContext{
Privileged: kubecontainer.HasPrivilegedContainer(pod),
SeccompProfilePath: m.getSeccompProfileFromAnnotations(pod.Annotations, ""),
},
}
sysctls, err := getSysctlsFromAnnotations(pod.Annotations)
if err != nil {
return nil, fmt.Errorf("failed to get sysctls from annotations %v for pod %q: %v", pod.Annotations, format.Pod(pod), err)
}
lc.Sysctls = sysctls
if pod.Spec.SecurityContext != nil {
sc := pod.Spec.SecurityContext
if sc.RunAsUser != nil {
lc.SecurityContext.RunAsUser = &runtimeapi.Int64Value{Value: int64(*sc.RunAsUser)}
}
lc.SecurityContext.NamespaceOptions = &runtimeapi.NamespaceOption{
HostNetwork: pod.Spec.HostNetwork,
HostIpc: pod.Spec.HostIPC,
HostPid: pod.Spec.HostPID,
}
if sc.FSGroup != nil {
lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, int64(*sc.FSGroup))
}
if groups := m.runtimeHelper.GetExtraSupplementalGroupsForPod(pod); len(groups) > 0 {
lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, groups...)
}
if sc.SupplementalGroups != nil {
for _, sg := range sc.SupplementalGroups {
lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, int64(sg))
}
}
if sc.SELinuxOptions != nil {
lc.SecurityContext.SelinuxOptions = &runtimeapi.SELinuxOption{
User: sc.SELinuxOptions.User,
Role: sc.SELinuxOptions.Role,
Type: sc.SELinuxOptions.Type,
Level: sc.SELinuxOptions.Level,
}
}
}
return lc, nil
}
// getKubeletSandboxes lists all (or just the running) sandboxes managed by kubelet.
func (m *kubeGenericRuntimeManager) getKubeletSandboxes(all bool) ([]*runtimeapi.PodSandbox, error) {
var filter *runtimeapi.PodSandboxFilter
if !all {
readyState := runtimeapi.PodSandboxState_SANDBOX_READY
filter = &runtimeapi.PodSandboxFilter{
State: &runtimeapi.PodSandboxStateValue{
State: readyState,
},
}
}
resp, err := m.runtimeService.ListPodSandbox(filter)
if err != nil {
glog.Errorf("ListPodSandbox failed: %v", err)
return nil, err
}
return resp, nil
}
// determinePodSandboxIP determines the IP address of the given pod sandbox.
func (m *kubeGenericRuntimeManager) determinePodSandboxIP(podNamespace, podName string, podSandbox *runtimeapi.PodSandboxStatus) string {
if podSandbox.Network == nil {
glog.Warningf("Pod Sandbox status doesn't have network information, cannot report IP")
return ""
}
ip := podSandbox.Network.Ip
if len(ip) != 0 && net.ParseIP(ip) == nil {
// ip could be an empty string if runtime is not responsible for the
// IP (e.g., host networking).
glog.Warningf("Pod Sandbox reported an unparseable IP %v", ip)
return ""
}
return ip
}
// getPodSandboxID gets the sandbox id by podUID and returns ([]sandboxID, error).
// Param state could be nil in order to get all sandboxes belonging to same pod.
func (m *kubeGenericRuntimeManager) getSandboxIDByPodUID(podUID kubetypes.UID, state *runtimeapi.PodSandboxState) ([]string, error) {
filter := &runtimeapi.PodSandboxFilter{
LabelSelector: map[string]string{types.KubernetesPodUIDLabel: string(podUID)},
}
if state != nil {
filter.State = &runtimeapi.PodSandboxStateValue{
State: *state,
}
}
sandboxes, err := m.runtimeService.ListPodSandbox(filter)
if err != nil {
glog.Errorf("ListPodSandbox with pod UID %q failed: %v", podUID, err)
return nil, err
}
if len(sandboxes) == 0 {
return nil, nil
}
// Sort with newest first.
sandboxIDs := make([]string, len(sandboxes))
sort.Sort(podSandboxByCreated(sandboxes))
for i, s := range sandboxes {
sandboxIDs[i] = s.Id
}
return sandboxIDs, nil
}
// GetPortForward gets the endpoint the runtime will serve the port-forward request from.
func (m *kubeGenericRuntimeManager) GetPortForward(podName, podNamespace string, podUID kubetypes.UID, ports []int32) (*url.URL, error) {
sandboxIDs, err := m.getSandboxIDByPodUID(podUID, nil)
if err != nil {
return nil, fmt.Errorf("failed to find sandboxID for pod %s: %v", format.PodDesc(podName, podNamespace, podUID), err)
}
if len(sandboxIDs) == 0 {
return nil, fmt.Errorf("failed to find sandboxID for pod %s", format.PodDesc(podName, podNamespace, podUID))
}
req := &runtimeapi.PortForwardRequest{
PodSandboxId: sandboxIDs[0],
Port: ports,
}
resp, err := m.runtimeService.PortForward(req)
if err != nil {
return nil, err
}
return url.Parse(resp.Url)
}

View File

@ -0,0 +1,65 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
)
// TestCreatePodSandbox tests creating sandbox and its corresponding pod log directory.
func TestCreatePodSandbox(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
},
},
}
fakeOS := m.osInterface.(*containertest.FakeOS)
fakeOS.MkdirAllFn = func(path string, perm os.FileMode) error {
// Check pod logs root directory is created.
assert.Equal(t, filepath.Join(podLogsRootDirectory, "12345678"), path)
assert.Equal(t, os.FileMode(0755), perm)
return nil
}
id, _, err := m.createPodSandbox(pod, 1)
assert.NoError(t, err)
assert.Contains(t, fakeRuntime.Called, "RunPodSandbox")
sandboxes, err := fakeRuntime.ListPodSandbox(&runtimeapi.PodSandboxFilter{Id: id})
assert.NoError(t, err)
assert.Equal(t, len(sandboxes), 1)
// TODO Check pod sandbox configuration
}

View File

@ -0,0 +1,278 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"encoding/json"
"strconv"
"github.com/golang/glog"
"k8s.io/api/core/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
)
const (
// TODO: change those label names to follow kubernetes's format
podDeletionGracePeriodLabel = "io.kubernetes.pod.deletionGracePeriod"
podTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
containerHashLabel = "io.kubernetes.container.hash"
containerRestartCountLabel = "io.kubernetes.container.restartCount"
containerTerminationMessagePathLabel = "io.kubernetes.container.terminationMessagePath"
containerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy"
containerPreStopHandlerLabel = "io.kubernetes.container.preStopHandler"
containerPortsLabel = "io.kubernetes.container.ports"
)
type labeledPodSandboxInfo struct {
// Labels from v1.Pod
Labels map[string]string
PodName string
PodNamespace string
PodUID kubetypes.UID
}
type annotatedPodSandboxInfo struct {
// Annotations from v1.Pod
Annotations map[string]string
}
type labeledContainerInfo struct {
ContainerName string
PodName string
PodNamespace string
PodUID kubetypes.UID
}
type annotatedContainerInfo struct {
Hash uint64
RestartCount int
PodDeletionGracePeriod *int64
PodTerminationGracePeriod *int64
TerminationMessagePath string
TerminationMessagePolicy v1.TerminationMessagePolicy
PreStopHandler *v1.Handler
ContainerPorts []v1.ContainerPort
}
// newPodLabels creates pod labels from v1.Pod.
func newPodLabels(pod *v1.Pod) map[string]string {
labels := map[string]string{}
// Get labels from v1.Pod
for k, v := range pod.Labels {
labels[k] = v
}
labels[types.KubernetesPodNameLabel] = pod.Name
labels[types.KubernetesPodNamespaceLabel] = pod.Namespace
labels[types.KubernetesPodUIDLabel] = string(pod.UID)
return labels
}
// newPodAnnotations creates pod annotations from v1.Pod.
func newPodAnnotations(pod *v1.Pod) map[string]string {
return pod.Annotations
}
// newContainerLabels creates container labels from v1.Container and v1.Pod.
func newContainerLabels(container *v1.Container, pod *v1.Pod) map[string]string {
labels := map[string]string{}
labels[types.KubernetesPodNameLabel] = pod.Name
labels[types.KubernetesPodNamespaceLabel] = pod.Namespace
labels[types.KubernetesPodUIDLabel] = string(pod.UID)
labels[types.KubernetesContainerNameLabel] = container.Name
return labels
}
// newContainerAnnotations creates container annotations from v1.Container and v1.Pod.
func newContainerAnnotations(container *v1.Container, pod *v1.Pod, restartCount int) map[string]string {
annotations := map[string]string{}
annotations[containerHashLabel] = strconv.FormatUint(kubecontainer.HashContainer(container), 16)
annotations[containerRestartCountLabel] = strconv.Itoa(restartCount)
annotations[containerTerminationMessagePathLabel] = container.TerminationMessagePath
annotations[containerTerminationMessagePolicyLabel] = string(container.TerminationMessagePolicy)
if pod.DeletionGracePeriodSeconds != nil {
annotations[podDeletionGracePeriodLabel] = strconv.FormatInt(*pod.DeletionGracePeriodSeconds, 10)
}
if pod.Spec.TerminationGracePeriodSeconds != nil {
annotations[podTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10)
}
if container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
// Using json enconding so that the PreStop handler object is readable after writing as a label
rawPreStop, err := json.Marshal(container.Lifecycle.PreStop)
if err != nil {
glog.Errorf("Unable to marshal lifecycle PreStop handler for container %q of pod %q: %v", container.Name, format.Pod(pod), err)
} else {
annotations[containerPreStopHandlerLabel] = string(rawPreStop)
}
}
if len(container.Ports) > 0 {
rawContainerPorts, err := json.Marshal(container.Ports)
if err != nil {
glog.Errorf("Unable to marshal container ports for container %q for pod %q: %v", container.Name, format.Pod(pod), err)
} else {
annotations[containerPortsLabel] = string(rawContainerPorts)
}
}
return annotations
}
// getPodSandboxInfoFromLabels gets labeledPodSandboxInfo from labels.
func getPodSandboxInfoFromLabels(labels map[string]string) *labeledPodSandboxInfo {
podSandboxInfo := &labeledPodSandboxInfo{
Labels: make(map[string]string),
PodName: getStringValueFromLabel(labels, types.KubernetesPodNameLabel),
PodNamespace: getStringValueFromLabel(labels, types.KubernetesPodNamespaceLabel),
PodUID: kubetypes.UID(getStringValueFromLabel(labels, types.KubernetesPodUIDLabel)),
}
// Remain only labels from v1.Pod
for k, v := range labels {
if k != types.KubernetesPodNameLabel && k != types.KubernetesPodNamespaceLabel && k != types.KubernetesPodUIDLabel {
podSandboxInfo.Labels[k] = v
}
}
return podSandboxInfo
}
// getPodSandboxInfoFromAnnotations gets annotatedPodSandboxInfo from annotations.
func getPodSandboxInfoFromAnnotations(annotations map[string]string) *annotatedPodSandboxInfo {
return &annotatedPodSandboxInfo{
Annotations: annotations,
}
}
// getContainerInfoFromLabels gets labeledContainerInfo from labels.
func getContainerInfoFromLabels(labels map[string]string) *labeledContainerInfo {
return &labeledContainerInfo{
PodName: getStringValueFromLabel(labels, types.KubernetesPodNameLabel),
PodNamespace: getStringValueFromLabel(labels, types.KubernetesPodNamespaceLabel),
PodUID: kubetypes.UID(getStringValueFromLabel(labels, types.KubernetesPodUIDLabel)),
ContainerName: getStringValueFromLabel(labels, types.KubernetesContainerNameLabel),
}
}
// getContainerInfoFromAnnotations gets annotatedContainerInfo from annotations.
func getContainerInfoFromAnnotations(annotations map[string]string) *annotatedContainerInfo {
var err error
containerInfo := &annotatedContainerInfo{
TerminationMessagePath: getStringValueFromLabel(annotations, containerTerminationMessagePathLabel),
TerminationMessagePolicy: v1.TerminationMessagePolicy(getStringValueFromLabel(annotations, containerTerminationMessagePolicyLabel)),
}
if containerInfo.Hash, err = getUint64ValueFromLabel(annotations, containerHashLabel); err != nil {
glog.Errorf("Unable to get %q from annotations %q: %v", containerHashLabel, annotations, err)
}
if containerInfo.RestartCount, err = getIntValueFromLabel(annotations, containerRestartCountLabel); err != nil {
glog.Errorf("Unable to get %q from annotations %q: %v", containerRestartCountLabel, annotations, err)
}
if containerInfo.PodDeletionGracePeriod, err = getInt64PointerFromLabel(annotations, podDeletionGracePeriodLabel); err != nil {
glog.Errorf("Unable to get %q from annotations %q: %v", podDeletionGracePeriodLabel, annotations, err)
}
if containerInfo.PodTerminationGracePeriod, err = getInt64PointerFromLabel(annotations, podTerminationGracePeriodLabel); err != nil {
glog.Errorf("Unable to get %q from annotations %q: %v", podTerminationGracePeriodLabel, annotations, err)
}
preStopHandler := &v1.Handler{}
if found, err := getJSONObjectFromLabel(annotations, containerPreStopHandlerLabel, preStopHandler); err != nil {
glog.Errorf("Unable to get %q from annotations %q: %v", containerPreStopHandlerLabel, annotations, err)
} else if found {
containerInfo.PreStopHandler = preStopHandler
}
containerPorts := []v1.ContainerPort{}
if found, err := getJSONObjectFromLabel(annotations, containerPortsLabel, &containerPorts); err != nil {
glog.Errorf("Unable to get %q from annotations %q: %v", containerPortsLabel, annotations, err)
} else if found {
containerInfo.ContainerPorts = containerPorts
}
return containerInfo
}
func getStringValueFromLabel(labels map[string]string, label string) string {
if value, found := labels[label]; found {
return value
}
// Do not report error, because there should be many old containers without label now.
glog.V(3).Infof("Container doesn't have label %s, it may be an old or invalid container", label)
// Return empty string "" for these containers, the caller will get value by other ways.
return ""
}
func getIntValueFromLabel(labels map[string]string, label string) (int, error) {
if strValue, found := labels[label]; found {
intValue, err := strconv.Atoi(strValue)
if err != nil {
// This really should not happen. Just set value to 0 to handle this abnormal case
return 0, err
}
return intValue, nil
}
// Do not report error, because there should be many old containers without label now.
glog.V(3).Infof("Container doesn't have label %s, it may be an old or invalid container", label)
// Just set the value to 0
return 0, nil
}
func getUint64ValueFromLabel(labels map[string]string, label string) (uint64, error) {
if strValue, found := labels[label]; found {
intValue, err := strconv.ParseUint(strValue, 16, 64)
if err != nil {
// This really should not happen. Just set value to 0 to handle this abnormal case
return 0, err
}
return intValue, nil
}
// Do not report error, because there should be many old containers without label now.
glog.V(3).Infof("Container doesn't have label %s, it may be an old or invalid container", label)
// Just set the value to 0
return 0, nil
}
func getInt64PointerFromLabel(labels map[string]string, label string) (*int64, error) {
if strValue, found := labels[label]; found {
int64Value, err := strconv.ParseInt(strValue, 10, 64)
if err != nil {
return nil, err
}
return &int64Value, nil
}
// If the label is not found, return pointer nil.
return nil, nil
}
// getJSONObjectFromLabel returns a bool value indicating whether an object is found.
func getJSONObjectFromLabel(labels map[string]string, label string, value interface{}) (bool, error) {
if strValue, found := labels[label]; found {
err := json.Unmarshal([]byte(strValue), value)
return found, err
}
// If the label is not found, return not found.
return false, nil
}

View File

@ -0,0 +1,217 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
func TestContainerLabels(t *testing.T) {
deletionGracePeriod := int64(10)
terminationGracePeriod := int64(10)
lifecycle := &v1.Lifecycle{
// Left PostStart as nil
PreStop: &v1.Handler{
Exec: &v1.ExecAction{
Command: []string{"action1", "action2"},
},
HTTPGet: &v1.HTTPGetAction{
Path: "path",
Host: "host",
Port: intstr.FromInt(8080),
Scheme: "scheme",
},
TCPSocket: &v1.TCPSocketAction{
Port: intstr.FromString("80"),
},
},
}
container := &v1.Container{
Name: "test_container",
TerminationMessagePath: "/somepath",
Lifecycle: lifecycle,
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test_pod",
Namespace: "test_pod_namespace",
UID: "test_pod_uid",
DeletionGracePeriodSeconds: &deletionGracePeriod,
},
Spec: v1.PodSpec{
Containers: []v1.Container{*container},
TerminationGracePeriodSeconds: &terminationGracePeriod,
},
}
expected := &labeledContainerInfo{
PodName: pod.Name,
PodNamespace: pod.Namespace,
PodUID: pod.UID,
ContainerName: container.Name,
}
// Test whether we can get right information from label
labels := newContainerLabels(container, pod)
containerInfo := getContainerInfoFromLabels(labels)
if !reflect.DeepEqual(containerInfo, expected) {
t.Errorf("expected %v, got %v", expected, containerInfo)
}
}
func TestContainerAnnotations(t *testing.T) {
restartCount := 5
deletionGracePeriod := int64(10)
terminationGracePeriod := int64(10)
lifecycle := &v1.Lifecycle{
// Left PostStart as nil
PreStop: &v1.Handler{
Exec: &v1.ExecAction{
Command: []string{"action1", "action2"},
},
HTTPGet: &v1.HTTPGetAction{
Path: "path",
Host: "host",
Port: intstr.FromInt(8080),
Scheme: "scheme",
},
TCPSocket: &v1.TCPSocketAction{
Port: intstr.FromString("80"),
},
},
}
containerPorts := []v1.ContainerPort{
{
Name: "http",
HostPort: 80,
ContainerPort: 8080,
Protocol: v1.ProtocolTCP,
},
{
Name: "https",
HostPort: 443,
ContainerPort: 6443,
Protocol: v1.ProtocolTCP,
},
}
container := &v1.Container{
Name: "test_container",
Ports: containerPorts,
TerminationMessagePath: "/somepath",
Lifecycle: lifecycle,
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test_pod",
Namespace: "test_pod_namespace",
UID: "test_pod_uid",
DeletionGracePeriodSeconds: &deletionGracePeriod,
},
Spec: v1.PodSpec{
Containers: []v1.Container{*container},
TerminationGracePeriodSeconds: &terminationGracePeriod,
},
}
expected := &annotatedContainerInfo{
ContainerPorts: containerPorts,
PodDeletionGracePeriod: pod.DeletionGracePeriodSeconds,
PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds,
Hash: kubecontainer.HashContainer(container),
RestartCount: restartCount,
TerminationMessagePath: container.TerminationMessagePath,
PreStopHandler: container.Lifecycle.PreStop,
}
// Test whether we can get right information from label
annotations := newContainerAnnotations(container, pod, restartCount)
containerInfo := getContainerInfoFromAnnotations(annotations)
if !reflect.DeepEqual(containerInfo, expected) {
t.Errorf("expected %v, got %v", expected, containerInfo)
}
// Test when DeletionGracePeriodSeconds, TerminationGracePeriodSeconds and Lifecycle are nil,
// the information got from annotations should also be nil
container.Lifecycle = nil
pod.DeletionGracePeriodSeconds = nil
pod.Spec.TerminationGracePeriodSeconds = nil
expected.PodDeletionGracePeriod = nil
expected.PodTerminationGracePeriod = nil
expected.PreStopHandler = nil
// Because container is changed, the Hash should be updated
expected.Hash = kubecontainer.HashContainer(container)
annotations = newContainerAnnotations(container, pod, restartCount)
containerInfo = getContainerInfoFromAnnotations(annotations)
if !reflect.DeepEqual(containerInfo, expected) {
t.Errorf("expected %v, got %v", expected, containerInfo)
}
}
func TestPodLabels(t *testing.T) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test_pod",
Namespace: "test_pod_namespace",
UID: "test_pod_uid",
Labels: map[string]string{"foo": "bar"},
},
Spec: v1.PodSpec{
Containers: []v1.Container{},
},
}
expected := &labeledPodSandboxInfo{
Labels: pod.Labels,
PodName: pod.Name,
PodNamespace: pod.Namespace,
PodUID: pod.UID,
}
// Test whether we can get right information from label
labels := newPodLabels(pod)
podSandboxInfo := getPodSandboxInfoFromLabels(labels)
if !reflect.DeepEqual(podSandboxInfo, expected) {
t.Errorf("expected %v, got %v", expected, podSandboxInfo)
}
}
func TestPodAnnotations(t *testing.T) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test_pod",
Namespace: "test_pod_namespace",
UID: "test_pod_uid",
Annotations: map[string]string{"foo": "bar"},
},
Spec: v1.PodSpec{
Containers: []v1.Container{},
},
}
expected := &annotatedPodSandboxInfo{
Annotations: map[string]string{"foo": "bar"},
}
// Test whether we can get right information from annotations
annotations := newPodAnnotations(pod)
podSandboxInfo := getPodSandboxInfoFromAnnotations(annotations)
if !reflect.DeepEqual(podSandboxInfo, expected) {
t.Errorf("expected %v, got %v", expected, podSandboxInfo)
}
}

View File

@ -0,0 +1,55 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"fmt"
"path"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
// This file implements the functions that are needed for backward
// compatibility. Therefore, it imports various kubernetes packages
// directly.
const (
// legacyContainerLogsDir is the legacy location of container logs. It is the same with
// kubelet.containerLogsDir.
legacyContainerLogsDir = "/var/log/containers"
// legacyLogSuffix is the legacy log suffix.
legacyLogSuffix = "log"
ext4MaxFileNameLen = 255
)
// legacyLogSymlink composes the legacy container log path. It is only used for legacy cluster
// logging support.
func legacyLogSymlink(containerID string, containerName, podName, podNamespace string) string {
return logSymlink(legacyContainerLogsDir, kubecontainer.BuildPodFullName(podName, podNamespace),
containerName, containerID)
}
func logSymlink(containerLogsDir, podFullName, containerName, dockerId string) string {
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, dockerId)
// Length of a filename cannot exceed 255 characters in ext4 on Linux.
if len(logPath) > ext4MaxFileNameLen-len(suffix) {
logPath = logPath[:ext4MaxFileNameLen-len(suffix)]
}
return path.Join(containerLogsDir, logPath+suffix)
}

View File

@ -0,0 +1,47 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"fmt"
"math/rand"
"path"
"testing"
"github.com/stretchr/testify/assert"
)
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
func randStringBytes(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Intn(len(letterBytes))]
}
return string(b)
}
func TestLogSymLink(t *testing.T) {
as := assert.New(t)
containerLogsDir := "/foo/bar"
podFullName := randStringBytes(128)
containerName := randStringBytes(70)
dockerId := randStringBytes(80)
// The file name cannot exceed 255 characters. Since .log suffix is required, the prefix cannot exceed 251 characters.
expectedPath := path.Join(containerLogsDir, fmt.Sprintf("%s_%s-%s", podFullName, containerName, dockerId)[:251]+".log")
as.Equal(expectedPath, logSymlink(containerLogsDir, podFullName, containerName, dockerId))
}

View File

@ -0,0 +1,44 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["logs.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/kuberuntime/logs",
visibility = ["//visibility:public"],
deps = [
"//pkg/kubelet/apis/cri:go_default_library",
"//pkg/kubelet/apis/cri/v1alpha1/runtime:go_default_library",
"//pkg/util/tail:go_default_library",
"//vendor/github.com/docker/docker/pkg/jsonlog:go_default_library",
"//vendor/github.com/fsnotify/fsnotify:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["logs_test.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/kuberuntime/logs",
library = ":go_default_library",
deps = [
"//pkg/kubelet/apis/cri/v1alpha1/runtime:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,390 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package logs
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"math"
"os"
"time"
"github.com/docker/docker/pkg/jsonlog"
"github.com/fsnotify/fsnotify"
"github.com/golang/glog"
"k8s.io/api/core/v1"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"k8s.io/kubernetes/pkg/util/tail"
)
// Notice that the current CRI logs implementation doesn't handle
// log rotation.
// * It will not retrieve logs in rotated log file.
// * If log rotation happens when following the log:
// * If the rotation is using create mode, we'll still follow the old file.
// * If the rotation is using copytruncate, we'll be reading at the original position and get nothing.
// TODO(random-liu): Support log rotation.
const (
// timeFormat is the time format used in the log.
timeFormat = time.RFC3339Nano
// blockSize is the block size used in tail.
blockSize = 1024
// stateCheckPeriod is the period to check container state while following
// the container log. Kubelet should not keep following the log when the
// container is not running.
stateCheckPeriod = 5 * time.Second
)
var (
// eol is the end-of-line sign in the log.
eol = []byte{'\n'}
// delimiter is the delimiter for timestamp and stream type in log line.
delimiter = []byte{' '}
// tagDelimiter is the delimiter for log tags.
tagDelimiter = []byte(runtimeapi.LogTagDelimiter)
)
// logMessage is the CRI internal log type.
type logMessage struct {
timestamp time.Time
stream runtimeapi.LogStreamType
log []byte
}
// reset resets the log to nil.
func (l *logMessage) reset() {
l.timestamp = time.Time{}
l.stream = ""
l.log = nil
}
// LogOptions is the CRI internal type of all log options.
type LogOptions struct {
tail int64
bytes int64
since time.Time
follow bool
timestamp bool
}
// NewLogOptions convert the v1.PodLogOptions to CRI internal LogOptions.
func NewLogOptions(apiOpts *v1.PodLogOptions, now time.Time) *LogOptions {
opts := &LogOptions{
tail: -1, // -1 by default which means read all logs.
bytes: -1, // -1 by default which means read all logs.
follow: apiOpts.Follow,
timestamp: apiOpts.Timestamps,
}
if apiOpts.TailLines != nil {
opts.tail = *apiOpts.TailLines
}
if apiOpts.LimitBytes != nil {
opts.bytes = *apiOpts.LimitBytes
}
if apiOpts.SinceSeconds != nil {
opts.since = now.Add(-time.Duration(*apiOpts.SinceSeconds) * time.Second)
}
if apiOpts.SinceTime != nil && apiOpts.SinceTime.After(opts.since) {
opts.since = apiOpts.SinceTime.Time
}
return opts
}
// parseFunc is a function parsing one log line to the internal log type.
// Notice that the caller must make sure logMessage is not nil.
type parseFunc func([]byte, *logMessage) error
var parseFuncs = []parseFunc{
parseCRILog, // CRI log format parse function
parseDockerJSONLog, // Docker JSON log format parse function
}
// parseCRILog parses logs in CRI log format. CRI Log format example:
// 2016-10-06T00:17:09.669794202Z stdout P log content 1
// 2016-10-06T00:17:09.669794203Z stderr F log content 2
func parseCRILog(log []byte, msg *logMessage) error {
var err error
// Parse timestamp
idx := bytes.Index(log, delimiter)
if idx < 0 {
return fmt.Errorf("timestamp is not found")
}
msg.timestamp, err = time.Parse(timeFormat, string(log[:idx]))
if err != nil {
return fmt.Errorf("unexpected timestamp format %q: %v", timeFormat, err)
}
// Parse stream type
log = log[idx+1:]
idx = bytes.Index(log, delimiter)
if idx < 0 {
return fmt.Errorf("stream type is not found")
}
msg.stream = runtimeapi.LogStreamType(log[:idx])
if msg.stream != runtimeapi.Stdout && msg.stream != runtimeapi.Stderr {
return fmt.Errorf("unexpected stream type %q", msg.stream)
}
// Parse log tag
log = log[idx+1:]
idx = bytes.Index(log, delimiter)
if idx < 0 {
return fmt.Errorf("log tag is not found")
}
// Keep this forward compatible.
tags := bytes.Split(log[:idx], tagDelimiter)
partial := (runtimeapi.LogTag(tags[0]) == runtimeapi.LogTagPartial)
// Trim the tailing new line if this is a partial line.
if partial && len(log) > 0 && log[len(log)-1] == '\n' {
log = log[:len(log)-1]
}
// Get log content
msg.log = log[idx+1:]
return nil
}
// parseDockerJSONLog parses logs in Docker JSON log format. Docker JSON log format
// example:
// {"log":"content 1","stream":"stdout","time":"2016-10-20T18:39:20.57606443Z"}
// {"log":"content 2","stream":"stderr","time":"2016-10-20T18:39:20.57606444Z"}
func parseDockerJSONLog(log []byte, msg *logMessage) error {
var l = &jsonlog.JSONLog{}
l.Reset()
// TODO: JSON decoding is fairly expensive, we should evaluate this.
if err := json.Unmarshal(log, l); err != nil {
return fmt.Errorf("failed with %v to unmarshal log %q", err, l)
}
msg.timestamp = l.Created
msg.stream = runtimeapi.LogStreamType(l.Stream)
msg.log = []byte(l.Log)
return nil
}
// getParseFunc returns proper parse function based on the sample log line passed in.
func getParseFunc(log []byte) (parseFunc, error) {
for _, p := range parseFuncs {
if err := p(log, &logMessage{}); err == nil {
return p, nil
}
}
return nil, fmt.Errorf("unsupported log format: %q", log)
}
// logWriter controls the writing into the stream based on the log options.
type logWriter struct {
stdout io.Writer
stderr io.Writer
opts *LogOptions
remain int64
}
// errMaximumWrite is returned when all bytes have been written.
var errMaximumWrite = errors.New("maximum write")
// errShortWrite is returned when the message is not fully written.
var errShortWrite = errors.New("short write")
func newLogWriter(stdout io.Writer, stderr io.Writer, opts *LogOptions) *logWriter {
w := &logWriter{
stdout: stdout,
stderr: stderr,
opts: opts,
remain: math.MaxInt64, // initialize it as infinity
}
if opts.bytes >= 0 {
w.remain = opts.bytes
}
return w
}
// writeLogs writes logs into stdout, stderr.
func (w *logWriter) write(msg *logMessage) error {
if msg.timestamp.Before(w.opts.since) {
// Skip the line because it's older than since
return nil
}
line := msg.log
if w.opts.timestamp {
prefix := append([]byte(msg.timestamp.Format(timeFormat)), delimiter[0])
line = append(prefix, line...)
}
// If the line is longer than the remaining bytes, cut it.
if int64(len(line)) > w.remain {
line = line[:w.remain]
}
// Get the proper stream to write to.
var stream io.Writer
switch msg.stream {
case runtimeapi.Stdout:
stream = w.stdout
case runtimeapi.Stderr:
stream = w.stderr
default:
return fmt.Errorf("unexpected stream type %q", msg.stream)
}
n, err := stream.Write(line)
w.remain -= int64(n)
if err != nil {
return err
}
// If the line has not been fully written, return errShortWrite
if n < len(line) {
return errShortWrite
}
// If there are no more bytes left, return errMaximumWrite
if w.remain <= 0 {
return errMaximumWrite
}
return nil
}
// ReadLogs read the container log and redirect into stdout and stderr.
// Note that containerID is only needed when following the log, or else
// just pass in empty string "".
func ReadLogs(path, containerID string, opts *LogOptions, runtimeService internalapi.RuntimeService, stdout, stderr io.Writer) error {
f, err := os.Open(path)
if err != nil {
return fmt.Errorf("failed to open log file %q: %v", path, err)
}
defer f.Close()
// Search start point based on tail line.
start, err := tail.FindTailLineStartIndex(f, opts.tail)
if err != nil {
return fmt.Errorf("failed to tail %d lines of log file %q: %v", opts.tail, path, err)
}
if _, err := f.Seek(start, os.SEEK_SET); err != nil {
return fmt.Errorf("failed to seek %d in log file %q: %v", start, path, err)
}
// Start parsing the logs.
r := bufio.NewReader(f)
// Do not create watcher here because it is not needed if `Follow` is false.
var watcher *fsnotify.Watcher
var parse parseFunc
var stop bool
writer := newLogWriter(stdout, stderr, opts)
msg := &logMessage{}
for {
if stop {
glog.V(2).Infof("Finish parsing log file %q", path)
return nil
}
l, err := r.ReadBytes(eol[0])
if err != nil {
if err != io.EOF { // This is an real error
return fmt.Errorf("failed to read log file %q: %v", path, err)
}
if opts.follow {
// Reset seek so that if this is an incomplete line,
// it will be read again.
if _, err := f.Seek(-int64(len(l)), os.SEEK_CUR); err != nil {
return fmt.Errorf("failed to reset seek in log file %q: %v", path, err)
}
if watcher == nil {
// Intialize the watcher if it has not been initialized yet.
if watcher, err = fsnotify.NewWatcher(); err != nil {
return fmt.Errorf("failed to create fsnotify watcher: %v", err)
}
defer watcher.Close()
if err := watcher.Add(f.Name()); err != nil {
return fmt.Errorf("failed to watch file %q: %v", f.Name(), err)
}
}
// Wait until the next log change.
if found, err := waitLogs(containerID, watcher, runtimeService); !found {
return err
}
continue
}
// Should stop after writing the remaining content.
stop = true
if len(l) == 0 {
continue
}
glog.Warningf("Incomplete line in log file %q: %q", path, l)
}
if parse == nil {
// Intialize the log parsing function.
parse, err = getParseFunc(l)
if err != nil {
return fmt.Errorf("failed to get parse function: %v", err)
}
}
// Parse the log line.
msg.reset()
if err := parse(l, msg); err != nil {
glog.Errorf("Failed with err %v when parsing log for log file %q: %q", err, path, l)
continue
}
// Write the log line into the stream.
if err := writer.write(msg); err != nil {
if err == errMaximumWrite {
glog.V(2).Infof("Finish parsing log file %q, hit bytes limit %d(bytes)", path, opts.bytes)
return nil
}
glog.Errorf("Failed with err %v when writing log for log file %q: %+v", err, path, msg)
return err
}
}
}
// waitLogs wait for the next log write. It returns a boolean and an error. The boolean
// indicates whether a new log is found; the error is error happens during waiting new logs.
func waitLogs(id string, w *fsnotify.Watcher, runtimeService internalapi.RuntimeService) (bool, error) {
errRetry := 5
for {
select {
case e := <-w.Events:
switch e.Op {
case fsnotify.Write:
return true, nil
default:
glog.Errorf("Unexpected fsnotify event: %v, retrying...", e)
}
case err := <-w.Errors:
glog.Errorf("Fsnotify watch error: %v, %d error retries remaining", err, errRetry)
if errRetry == 0 {
return false, err
}
errRetry--
case <-time.After(stateCheckPeriod):
s, err := runtimeService.ContainerStatus(id)
if err != nil {
return false, err
}
// Only keep following container log when it is running.
if s.State != runtimeapi.ContainerState_CONTAINER_RUNNING {
glog.Errorf("Container %q is not running (state=%q)", id, s.State)
// Do not return error because it's normal that the container stops
// during waiting.
return false, nil
}
}
}
}

View File

@ -0,0 +1,260 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package logs
import (
"bytes"
"testing"
"time"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
)
func TestLogOptions(t *testing.T) {
var (
line = int64(8)
bytes = int64(64)
timestamp = metav1.Now()
sinceseconds = int64(10)
)
for c, test := range []struct {
apiOpts *v1.PodLogOptions
expect *LogOptions
}{
{ // empty options
apiOpts: &v1.PodLogOptions{},
expect: &LogOptions{tail: -1, bytes: -1},
},
{ // test tail lines
apiOpts: &v1.PodLogOptions{TailLines: &line},
expect: &LogOptions{tail: line, bytes: -1},
},
{ // test limit bytes
apiOpts: &v1.PodLogOptions{LimitBytes: &bytes},
expect: &LogOptions{tail: -1, bytes: bytes},
},
{ // test since timestamp
apiOpts: &v1.PodLogOptions{SinceTime: &timestamp},
expect: &LogOptions{tail: -1, bytes: -1, since: timestamp.Time},
},
{ // test since seconds
apiOpts: &v1.PodLogOptions{SinceSeconds: &sinceseconds},
expect: &LogOptions{tail: -1, bytes: -1, since: timestamp.Add(-10 * time.Second)},
},
} {
t.Logf("TestCase #%d: %+v", c, test)
opts := NewLogOptions(test.apiOpts, timestamp.Time)
assert.Equal(t, test.expect, opts)
}
}
func TestParseLog(t *testing.T) {
timestamp, err := time.Parse(timeFormat, "2016-10-20T18:39:20.57606443Z")
assert.NoError(t, err)
msg := &logMessage{}
for c, test := range []struct {
line string
msg *logMessage
err bool
}{
{ // Docker log format stdout
line: `{"log":"docker stdout test log","stream":"stdout","time":"2016-10-20T18:39:20.57606443Z"}` + "\n",
msg: &logMessage{
timestamp: timestamp,
stream: runtimeapi.Stdout,
log: []byte("docker stdout test log"),
},
},
{ // Docker log format stderr
line: `{"log":"docker stderr test log","stream":"stderr","time":"2016-10-20T18:39:20.57606443Z"}` + "\n",
msg: &logMessage{
timestamp: timestamp,
stream: runtimeapi.Stderr,
log: []byte("docker stderr test log"),
},
},
{ // CRI log format stdout
line: "2016-10-20T18:39:20.57606443Z stdout F cri stdout test log\n",
msg: &logMessage{
timestamp: timestamp,
stream: runtimeapi.Stdout,
log: []byte("cri stdout test log\n"),
},
},
{ // CRI log format stderr
line: "2016-10-20T18:39:20.57606443Z stderr F cri stderr test log\n",
msg: &logMessage{
timestamp: timestamp,
stream: runtimeapi.Stderr,
log: []byte("cri stderr test log\n"),
},
},
{ // Unsupported Log format
line: "unsupported log format test log\n",
msg: &logMessage{},
err: true,
},
{ // Partial CRI log line
line: "2016-10-20T18:39:20.57606443Z stdout P cri stdout partial test log\n",
msg: &logMessage{
timestamp: timestamp,
stream: runtimeapi.Stdout,
log: []byte("cri stdout partial test log"),
},
},
{ // Partial CRI log line with multiple log tags.
line: "2016-10-20T18:39:20.57606443Z stdout P:TAG1:TAG2 cri stdout partial test log\n",
msg: &logMessage{
timestamp: timestamp,
stream: runtimeapi.Stdout,
log: []byte("cri stdout partial test log"),
},
},
} {
t.Logf("TestCase #%d: %+v", c, test)
parse, err := getParseFunc([]byte(test.line))
if test.err {
assert.Error(t, err)
continue
}
assert.NoError(t, err)
err = parse([]byte(test.line), msg)
assert.NoError(t, err)
assert.Equal(t, test.msg, msg)
}
}
func TestWriteLogs(t *testing.T) {
timestamp := time.Unix(1234, 4321)
log := "abcdefg\n"
for c, test := range []struct {
stream runtimeapi.LogStreamType
since time.Time
timestamp bool
expectStdout string
expectStderr string
}{
{ // stderr log
stream: runtimeapi.Stderr,
expectStderr: log,
},
{ // stdout log
stream: runtimeapi.Stdout,
expectStdout: log,
},
{ // since is after timestamp
stream: runtimeapi.Stdout,
since: timestamp.Add(1 * time.Second),
},
{ // timestamp enabled
stream: runtimeapi.Stderr,
timestamp: true,
expectStderr: timestamp.Format(timeFormat) + " " + log,
},
} {
t.Logf("TestCase #%d: %+v", c, test)
msg := &logMessage{
timestamp: timestamp,
stream: test.stream,
log: []byte(log),
}
stdoutBuf := bytes.NewBuffer(nil)
stderrBuf := bytes.NewBuffer(nil)
w := newLogWriter(stdoutBuf, stderrBuf, &LogOptions{since: test.since, timestamp: test.timestamp, bytes: -1})
err := w.write(msg)
assert.NoError(t, err)
assert.Equal(t, test.expectStdout, stdoutBuf.String())
assert.Equal(t, test.expectStderr, stderrBuf.String())
}
}
func TestWriteLogsWithBytesLimit(t *testing.T) {
timestamp := time.Unix(1234, 4321)
timestampStr := timestamp.Format(timeFormat)
log := "abcdefg\n"
for c, test := range []struct {
stdoutLines int
stderrLines int
bytes int
timestamp bool
expectStdout string
expectStderr string
}{
{ // limit bytes less than one line
stdoutLines: 3,
bytes: 3,
expectStdout: "abc",
},
{ // limit bytes across lines
stdoutLines: 3,
bytes: len(log) + 3,
expectStdout: "abcdefg\nabc",
},
{ // limit bytes more than all lines
stdoutLines: 3,
bytes: 3 * len(log),
expectStdout: "abcdefg\nabcdefg\nabcdefg\n",
},
{ // limit bytes for stderr
stderrLines: 3,
bytes: len(log) + 3,
expectStderr: "abcdefg\nabc",
},
{ // limit bytes for both stdout and stderr, stdout first.
stdoutLines: 1,
stderrLines: 2,
bytes: len(log) + 3,
expectStdout: "abcdefg\n",
expectStderr: "abc",
},
{ // limit bytes with timestamp
stdoutLines: 3,
timestamp: true,
bytes: len(timestampStr) + 1 + len(log) + 2,
expectStdout: timestampStr + " " + log + timestampStr[:2],
},
} {
t.Logf("TestCase #%d: %+v", c, test)
msg := &logMessage{
timestamp: timestamp,
log: []byte(log),
}
stdoutBuf := bytes.NewBuffer(nil)
stderrBuf := bytes.NewBuffer(nil)
w := newLogWriter(stdoutBuf, stderrBuf, &LogOptions{timestamp: test.timestamp, bytes: int64(test.bytes)})
for i := 0; i < test.stdoutLines; i++ {
msg.stream = runtimeapi.Stdout
if err := w.write(msg); err != nil {
assert.EqualError(t, err, errMaximumWrite.Error())
}
}
for i := 0; i < test.stderrLines; i++ {
msg.stream = runtimeapi.Stderr
if err := w.write(msg); err != nil {
assert.EqualError(t, err, errMaximumWrite.Error())
}
}
assert.Equal(t, test.expectStdout, stdoutBuf.String())
assert.Equal(t, test.expectStderr, stderrBuf.String())
}
}

View File

@ -0,0 +1,157 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"fmt"
"k8s.io/api/core/v1"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"k8s.io/kubernetes/pkg/security/apparmor"
"k8s.io/kubernetes/pkg/securitycontext"
)
// determineEffectiveSecurityContext gets container's security context from v1.Pod and v1.Container.
func (m *kubeGenericRuntimeManager) determineEffectiveSecurityContext(pod *v1.Pod, container *v1.Container, uid *int64, username string) *runtimeapi.LinuxContainerSecurityContext {
effectiveSc := securitycontext.DetermineEffectiveSecurityContext(pod, container)
synthesized := convertToRuntimeSecurityContext(effectiveSc)
if synthesized == nil {
synthesized = &runtimeapi.LinuxContainerSecurityContext{}
}
// set SeccompProfilePath.
synthesized.SeccompProfilePath = m.getSeccompProfileFromAnnotations(pod.Annotations, container.Name)
// set ApparmorProfile.
synthesized.ApparmorProfile = apparmor.GetProfileNameFromPodAnnotations(pod.Annotations, container.Name)
// set RunAsUser.
if synthesized.RunAsUser == nil {
if uid != nil {
synthesized.RunAsUser = &runtimeapi.Int64Value{Value: *uid}
}
synthesized.RunAsUsername = username
}
// set namespace options and supplemental groups.
synthesized.NamespaceOptions = &runtimeapi.NamespaceOption{
HostNetwork: pod.Spec.HostNetwork,
HostIpc: pod.Spec.HostIPC,
HostPid: pod.Spec.HostPID,
}
podSc := pod.Spec.SecurityContext
if podSc != nil {
if podSc.FSGroup != nil {
synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, int64(*podSc.FSGroup))
}
if podSc.SupplementalGroups != nil {
for _, sg := range podSc.SupplementalGroups {
synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, int64(sg))
}
}
}
if groups := m.runtimeHelper.GetExtraSupplementalGroupsForPod(pod); len(groups) > 0 {
synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, groups...)
}
synthesized.NoNewPrivs = securitycontext.AddNoNewPrivileges(effectiveSc)
return synthesized
}
// verifyRunAsNonRoot verifies RunAsNonRoot.
func verifyRunAsNonRoot(pod *v1.Pod, container *v1.Container, uid *int64, username string) error {
effectiveSc := securitycontext.DetermineEffectiveSecurityContext(pod, container)
// If the option is not set, or if running as root is allowed, return nil.
if effectiveSc == nil || effectiveSc.RunAsNonRoot == nil || !*effectiveSc.RunAsNonRoot {
return nil
}
if effectiveSc.RunAsUser != nil {
if *effectiveSc.RunAsUser == 0 {
return fmt.Errorf("container's runAsUser breaks non-root policy")
}
return nil
}
switch {
case uid != nil && *uid == 0:
return fmt.Errorf("container has runAsNonRoot and image will run as root")
case uid == nil && len(username) > 0:
return fmt.Errorf("container has runAsNonRoot and image has non-numeric user (%s), cannot verify user is non-root", username)
default:
return nil
}
}
// convertToRuntimeSecurityContext converts v1.SecurityContext to runtimeapi.SecurityContext.
func convertToRuntimeSecurityContext(securityContext *v1.SecurityContext) *runtimeapi.LinuxContainerSecurityContext {
if securityContext == nil {
return nil
}
sc := &runtimeapi.LinuxContainerSecurityContext{
Capabilities: convertToRuntimeCapabilities(securityContext.Capabilities),
SelinuxOptions: convertToRuntimeSELinuxOption(securityContext.SELinuxOptions),
}
if securityContext.RunAsUser != nil {
sc.RunAsUser = &runtimeapi.Int64Value{Value: int64(*securityContext.RunAsUser)}
}
if securityContext.Privileged != nil {
sc.Privileged = *securityContext.Privileged
}
if securityContext.ReadOnlyRootFilesystem != nil {
sc.ReadonlyRootfs = *securityContext.ReadOnlyRootFilesystem
}
return sc
}
// convertToRuntimeSELinuxOption converts v1.SELinuxOptions to runtimeapi.SELinuxOption.
func convertToRuntimeSELinuxOption(opts *v1.SELinuxOptions) *runtimeapi.SELinuxOption {
if opts == nil {
return nil
}
return &runtimeapi.SELinuxOption{
User: opts.User,
Role: opts.Role,
Type: opts.Type,
Level: opts.Level,
}
}
// convertToRuntimeCapabilities converts v1.Capabilities to runtimeapi.Capability.
func convertToRuntimeCapabilities(opts *v1.Capabilities) *runtimeapi.Capability {
if opts == nil {
return nil
}
capabilities := &runtimeapi.Capability{
AddCapabilities: make([]string, len(opts.Add)),
DropCapabilities: make([]string, len(opts.Drop)),
}
for index, value := range opts.Add {
capabilities.AddCapabilities[index] = string(value)
}
for index, value := range opts.Drop {
capabilities.DropCapabilities[index] = string(value)
}
return capabilities
}

View File

@ -0,0 +1,116 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/stretchr/testify/assert"
"testing"
)
func TestVerifyRunAsNonRoot(t *testing.T) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
Command: []string{"testCommand"},
WorkingDir: "testWorkingDir",
},
},
},
}
rootUser := int64(0)
runAsNonRootTrue := true
runAsNonRootFalse := false
imageRootUser := int64(0)
imageNonRootUser := int64(123)
for _, test := range []struct {
desc string
sc *v1.SecurityContext
imageUser int64
fail bool
}{
{
desc: "Pass if SecurityContext is not set",
sc: nil,
imageUser: imageRootUser,
fail: false,
},
{
desc: "Pass if RunAsNonRoot is not set",
sc: &v1.SecurityContext{
RunAsUser: &rootUser,
},
imageUser: imageRootUser,
fail: false,
},
{
desc: "Pass if RunAsNonRoot is false (image user is root)",
sc: &v1.SecurityContext{
RunAsNonRoot: &runAsNonRootFalse,
},
imageUser: imageRootUser,
fail: false,
},
{
desc: "Pass if RunAsNonRoot is false (RunAsUser is root)",
sc: &v1.SecurityContext{
RunAsNonRoot: &runAsNonRootFalse,
RunAsUser: &rootUser,
},
imageUser: imageNonRootUser,
fail: false,
},
{
desc: "Fail if container's RunAsUser is root and RunAsNonRoot is true",
sc: &v1.SecurityContext{
RunAsNonRoot: &runAsNonRootTrue,
RunAsUser: &rootUser,
},
imageUser: imageNonRootUser,
fail: true,
},
{
desc: "Fail if image's user is root and RunAsNonRoot is true",
sc: &v1.SecurityContext{
RunAsNonRoot: &runAsNonRootTrue,
},
imageUser: imageRootUser,
fail: true,
},
} {
pod.Spec.Containers[0].SecurityContext = test.sc
uid := int64(0)
err := verifyRunAsNonRoot(pod, &pod.Spec.Containers[0], &uid, "")
if test.fail {
assert.Error(t, err, test.desc)
} else {
assert.NoError(t, err, test.desc)
}
}
}