vendor update for CSI 0.3.0

This commit is contained in:
gman
2018-07-18 16:47:22 +02:00
parent 6f484f92fc
commit 8ea659f0d5
6810 changed files with 438061 additions and 193861 deletions

View File

@ -1,9 +1,9 @@
approvers:
- crassirostris
- DirectXMan12
- fabxc
- fgrzadkowski
- piosz
- x13n
- kawych
reviewers:
- sig-instrumentation-pr-reviews

View File

@ -16,7 +16,7 @@ go_library(
"//test/e2e/framework:go_default_library",
"//test/e2e/instrumentation/common:go_default_library",
"//test/e2e/instrumentation/logging/elasticsearch:go_default_library",
"//test/e2e/instrumentation/logging/stackdrvier:go_default_library",
"//test/e2e/instrumentation/logging/stackdriver:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
@ -35,7 +35,7 @@ filegroup(
srcs = [
":package-srcs",
"//test/e2e/instrumentation/logging/elasticsearch:all-srcs",
"//test/e2e/instrumentation/logging/stackdrvier:all-srcs",
"//test/e2e/instrumentation/logging/stackdriver:all-srcs",
"//test/e2e/instrumentation/logging/utils:all-srcs",
],
tags = ["automanaged"],

View File

@ -1,8 +1,6 @@
reviewers:
- coffeepac
- crassirostris
- piosz
approvers:
- coffeepac
- crassirostris
- piosz

View File

@ -18,5 +18,5 @@ package logging
import (
_ "k8s.io/kubernetes/test/e2e/instrumentation/logging/elasticsearch"
_ "k8s.io/kubernetes/test/e2e/instrumentation/logging/stackdrvier"
_ "k8s.io/kubernetes/test/e2e/instrumentation/logging/stackdriver"
)

View File

@ -12,13 +12,12 @@ go_library(
"soak.go",
"utils.go",
],
importpath = "k8s.io/kubernetes/test/e2e/instrumentation/logging/stackdrvier",
importpath = "k8s.io/kubernetes/test/e2e/instrumentation/logging/stackdriver",
deps = [
"//test/e2e/framework:go_default_library",
"//test/e2e/instrumentation/common:go_default_library",
"//test/e2e/instrumentation/logging/utils:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/golang.org/x/net/context:go_default_library",
"//vendor/golang.org/x/oauth2/google:go_default_library",
"//vendor/google.golang.org/api/logging/v2beta1:go_default_library",
"//vendor/google.golang.org/api/pubsub/v1:go_default_library",

View File

@ -162,7 +162,11 @@ var _ = instrumentation.SIGDescribe("Cluster level logging implemented by Stackd
}()
ginkgo.By("Waiting for events to ingest")
c := utils.NewLogChecker(p, utils.UntilFirstEntry, utils.JustTimeout, "")
location := framework.TestContext.CloudConfig.Zone
if framework.TestContext.CloudConfig.MultiMaster {
location = framework.TestContext.CloudConfig.Region
}
c := utils.NewLogChecker(p, utils.UntilFirstEntryFromLocation(location), utils.JustTimeout, "")
err := utils.WaitForLogs(c, ingestionInterval, ingestionTimeout)
framework.ExpectNoError(err)
})

View File

@ -49,7 +49,7 @@ var _ = instrumentation.SIGDescribe("Cluster level logging implemented by Stackd
maxPodCount := 10
jobDuration := 30 * time.Minute
linesPerPodPerSecond := 100
// TODO(crassirostris): Increase to 21 hrs
// TODO(instrumentation): Increase to 21 hrs
testDuration := 3 * time.Hour
ingestionInterval := 1 * time.Minute
ingestionTimeout := testDuration + 30*time.Minute

View File

@ -17,6 +17,7 @@ limitations under the License.
package stackdriver
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
@ -27,7 +28,6 @@ import (
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/instrumentation/logging/utils"
"golang.org/x/net/context"
"golang.org/x/oauth2/google"
sd "google.golang.org/api/logging/v2beta1"
pubsub "google.golang.org/api/pubsub/v1"
@ -49,6 +49,9 @@ const (
// The parallelism level of polling logs process.
sdLoggingPollParallelism = 10
// The limit on the number of stackdriver sinks that can be created within one project.
stackdriverSinkCountLimit = 90
)
type logProviderScope int
@ -86,6 +89,10 @@ func newSdLogProvider(f *framework.Framework, scope logProviderScope) (*sdLogPro
if err != nil {
return nil, err
}
err = ensureProjectHasSinkCapacity(sdService.Projects.Sinks, framework.TestContext.CloudConfig.ProjectID)
if err != nil {
return nil, err
}
pubsubService, err := pubsub.New(hc)
if err != nil {
@ -104,6 +111,36 @@ func newSdLogProvider(f *framework.Framework, scope logProviderScope) (*sdLogPro
return provider, nil
}
func ensureProjectHasSinkCapacity(sinksService *sd.ProjectsSinksService, projectID string) error {
listResponse, err := listSinks(sinksService, projectID)
if err != nil {
return err
}
if len(listResponse.Sinks) >= stackdriverSinkCountLimit {
framework.Logf("Reached Stackdriver sink limit. Deleting all sinks")
deleteSinks(sinksService, projectID, listResponse.Sinks)
}
return nil
}
func listSinks(sinksService *sd.ProjectsSinksService, projectID string) (*sd.ListSinksResponse, error) {
projectDst := fmt.Sprintf("projects/%s", projectID)
listResponse, err := sinksService.List(projectDst).PageSize(stackdriverSinkCountLimit).Do()
if err != nil {
return nil, fmt.Errorf("failed to list Stackdriver Logging sinks: %v", err)
}
return listResponse, nil
}
func deleteSinks(sinksService *sd.ProjectsSinksService, projectID string, sinks []*sd.LogSink) {
for _, sink := range sinks {
sinkNameID := fmt.Sprintf("projects/%s/sinks/%s", projectID, sink.Name)
if _, err := sinksService.Delete(sinkNameID).Do(); err != nil {
framework.Logf("Failed to delete LogSink: %v", err)
}
}
}
func (p *sdLogProvider) Init() error {
projectID := framework.TestContext.CloudConfig.ProjectID
nsName := p.framework.Namespace.Name

View File

@ -26,7 +26,7 @@ func GetNodeIds(cs clientset.Interface) []string {
nodes := framework.GetReadySchedulableNodesOrDie(cs)
nodeIds := []string{}
for _, n := range nodes.Items {
nodeIds = append(nodeIds, n.Spec.ExternalID)
nodeIds = append(nodeIds, n.Name)
}
return nodeIds
}

View File

@ -48,9 +48,19 @@ func UntilFirstEntryFromLog(log string) IngestionPred {
return func(_ string, entries []LogEntry) (bool, error) {
for _, e := range entries {
if e.LogName == log {
if e.Location != framework.TestContext.CloudConfig.Zone {
return false, fmt.Errorf("Bad location in logs '%s' != '%d'", e.Location, framework.TestContext.CloudConfig.Zone)
}
return true, nil
}
}
return false, nil
}
}
// UntilFirstEntryFromLocation is a IngestionPred that checks that at least one
// entry from the log with a given name was ingested.
func UntilFirstEntryFromLocation(location string) IngestionPred {
return func(_ string, entries []LogEntry) (bool, error) {
for _, e := range entries {
if e.Location == location {
return true, nil
}
}

View File

@ -14,6 +14,7 @@ go_library(
"custom_metrics_stackdriver.go",
"influxdb.go",
"metrics_grabber.go",
"prometheus.go",
"stackdriver.go",
"stackdriver_metadata_agent.go",
],
@ -28,6 +29,7 @@ go_library(
"//vendor/github.com/influxdata/influxdb/client/v2:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/github.com/prometheus/common/model:go_default_library",
"//vendor/golang.org/x/oauth2/google:go_default_library",
"//vendor/google.golang.org/api/monitoring/v3:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
@ -42,6 +44,7 @@ go_library(
"//vendor/k8s.io/client-go/discovery:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/metrics/pkg/client/custom_metrics:go_default_library",
"//vendor/k8s.io/metrics/pkg/client/external_metrics:go_default_library",
],
)

View File

@ -18,6 +18,7 @@ package monitoring
import (
"fmt"
"strings"
gcm "google.golang.org/api/monitoring/v3"
corev1 "k8s.io/api/core/v1"
@ -25,6 +26,7 @@ import (
rbac "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/test/e2e/framework"
"os/exec"
)
var (
@ -52,6 +54,11 @@ var (
},
},
}
StagingDeploymentsLocation = "https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/deploy/staging/"
AdapterForOldResourceModel = "adapter_old_resource_model.yaml"
AdapterForNewResourceModel = "adapter_new_resource_model.yaml"
AdapterDefault = AdapterForOldResourceModel
ClusterAdminBinding = "e2e-test-cluster-admin-binding"
)
// CustomMetricContainerSpec allows to specify a config for StackdriverExporterDeployment
@ -82,7 +89,7 @@ func SimpleStackdriverExporterDeployment(name, namespace string, replicas int32,
func StackdriverExporterDeployment(name, namespace string, replicas int32, containers []CustomMetricContainerSpec) *extensions.Deployment {
podSpec := corev1.PodSpec{Containers: []corev1.Container{}}
for _, containerSpec := range containers {
podSpec.Containers = append(podSpec.Containers, stackdriverExporterContainerSpec(containerSpec.Name, containerSpec.MetricName, containerSpec.MetricValue))
podSpec.Containers = append(podSpec.Containers, stackdriverExporterContainerSpec(containerSpec.Name, namespace, containerSpec.MetricName, containerSpec.MetricValue))
}
return &extensions.Deployment{
@ -119,17 +126,30 @@ func StackdriverExporterPod(podName, namespace, podLabel, metricName string, met
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{stackdriverExporterContainerSpec(StackdriverExporter, metricName, metricValue)},
Containers: []corev1.Container{stackdriverExporterContainerSpec(StackdriverExporter, namespace, metricName, metricValue)},
},
}
}
func stackdriverExporterContainerSpec(name string, metricName string, metricValue int64) corev1.Container {
func stackdriverExporterContainerSpec(name string, namespace string, metricName string, metricValue int64) corev1.Container {
return corev1.Container{
Name: name,
Image: "k8s.gcr.io/sd-dummy-exporter:v0.1.0",
Image: "k8s.gcr.io/sd-dummy-exporter:v0.2.0",
ImagePullPolicy: corev1.PullPolicy("Always"),
Command: []string{"/sd_dummy_exporter", "--pod-id=$(POD_ID)", "--metric-name=" + metricName, fmt.Sprintf("--metric-value=%v", metricValue)},
Command: []string{
"/bin/sh",
"-c",
strings.Join([]string{
"./sd_dummy_exporter",
"--pod-id=$(POD_ID)",
"--pod-name=$(POD_NAME)",
"--namespace=" + namespace,
"--metric-name=" + metricName,
fmt.Sprintf("--metric-value=%v", metricValue),
"--use-old-resource-model",
"--use-new-resource-model",
}, " "),
},
Env: []corev1.EnvVar{
{
Name: "POD_ID",
@ -139,6 +159,14 @@ func stackdriverExporterContainerSpec(name string, metricName string, metricValu
},
},
},
{
Name: "POD_NAME",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.name",
},
},
},
},
Ports: []corev1.ContainerPort{{ContainerPort: 80}},
}
@ -210,9 +238,35 @@ func prometheusExporterPodSpec(metricName string, metricValue int64, port int32)
}
}
// CreateAdapter creates Custom Metrics - Stackdriver adapter.
func CreateAdapter() error {
stat, err := framework.RunKubectl("create", "-f", "https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/adapter-beta.yaml")
// CreateAdapter creates Custom Metrics - Stackdriver adapter
// adapterDeploymentFile should be a filename for adapter deployment located in StagingDeploymentLocation
func CreateAdapter(adapterDeploymentFile string) error {
// A workaround to make the work on GKE. GKE doesn't normally allow to create cluster roles,
// which the adapter deployment does. The solution is to create cluster role binding for
// cluster-admin role and currently used service account.
err := createClusterAdminBinding()
if err != nil {
return err
}
adapterURL := StagingDeploymentsLocation + adapterDeploymentFile
err = exec.Command("wget", adapterURL).Run()
if err != nil {
return err
}
stat, err := framework.RunKubectl("create", "-f", adapterURL)
framework.Logf(stat)
return err
}
func createClusterAdminBinding() error {
stdout, stderr, err := framework.RunCmd("gcloud", "config", "get-value", "core/account")
if err != nil {
framework.Logf(stderr)
return err
}
serviceAccount := strings.TrimSpace(stdout)
framework.Logf("current service account: %q", serviceAccount)
stat, err := framework.RunKubectl("create", "clusterrolebinding", ClusterAdminBinding, "--clusterrole=cluster-admin", "--user="+serviceAccount)
framework.Logf(stat)
return err
}
@ -251,8 +305,23 @@ func CleanupDescriptors(service *gcm.Service, projectId string) {
}
// CleanupAdapter deletes Custom Metrics - Stackdriver adapter deployments.
func CleanupAdapter() error {
stat, err := framework.RunKubectl("delete", "-f", "https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/adapter-beta.yaml")
func CleanupAdapter(adapterDeploymentFile string) {
stat, err := framework.RunKubectl("delete", "-f", adapterDeploymentFile)
framework.Logf(stat)
return err
if err != nil {
framework.Logf("Failed to delete adapter deployments: %s", err)
}
err = exec.Command("rm", adapterDeploymentFile).Run()
if err != nil {
framework.Logf("Failed to delete adapter deployment file: %s", err)
}
cleanupClusterAdminBinding()
}
func cleanupClusterAdminBinding() {
stat, err := framework.RunKubectl("delete", "clusterrolebinding", ClusterAdminBinding)
framework.Logf(stat)
if err != nil {
framework.Logf("Failed to delete cluster admin binding: %s", err)
}
}

View File

@ -28,12 +28,14 @@ import (
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
gcm "google.golang.org/api/monitoring/v3"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/client-go/discovery"
"k8s.io/kubernetes/test/e2e/framework"
customclient "k8s.io/metrics/pkg/client/custom_metrics"
externalclient "k8s.io/metrics/pkg/client/external_metrics"
)
const (
@ -48,41 +50,46 @@ var _ = instrumentation.SIGDescribe("Stackdriver Monitoring", func() {
})
f := framework.NewDefaultFramework("stackdriver-monitoring")
var kubeClient clientset.Interface
var customMetricsClient customclient.CustomMetricsClient
var discoveryClient *discovery.DiscoveryClient
It("should run Custom Metrics - Stackdriver Adapter [Feature:StackdriverCustomMetrics]", func() {
kubeClient = f.ClientSet
It("should run Custom Metrics - Stackdriver Adapter for old resource model [Feature:StackdriverCustomMetrics]", func() {
kubeClient := f.ClientSet
config, err := framework.LoadConfig()
if err != nil {
framework.Failf("Failed to load config: %s", err)
}
customMetricsClient = customclient.NewForConfigOrDie(config)
discoveryClient = discovery.NewDiscoveryClientForConfigOrDie(config)
testAdapter(f, kubeClient, customMetricsClient, discoveryClient)
customMetricsClient := customclient.NewForConfigOrDie(config)
discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(config)
testCustomMetrics(f, kubeClient, customMetricsClient, discoveryClient, AdapterForOldResourceModel)
})
It("should run Custom Metrics - Stackdriver Adapter for new resource model [Feature:StackdriverCustomMetrics]", func() {
kubeClient := f.ClientSet
config, err := framework.LoadConfig()
if err != nil {
framework.Failf("Failed to load config: %s", err)
}
customMetricsClient := customclient.NewForConfigOrDie(config)
discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(config)
testCustomMetrics(f, kubeClient, customMetricsClient, discoveryClient, AdapterForNewResourceModel)
})
It("should run Custom Metrics - Stackdriver Adapter for external metrics [Feature:StackdriverExternalMetrics]", func() {
kubeClient := f.ClientSet
config, err := framework.LoadConfig()
if err != nil {
framework.Failf("Failed to load config: %s", err)
}
externalMetricsClient := externalclient.NewForConfigOrDie(config)
testExternalMetrics(f, kubeClient, externalMetricsClient)
})
})
func testAdapter(f *framework.Framework, kubeClient clientset.Interface, customMetricsClient customclient.CustomMetricsClient, discoveryClient *discovery.DiscoveryClient) {
func testCustomMetrics(f *framework.Framework, kubeClient clientset.Interface, customMetricsClient customclient.CustomMetricsClient, discoveryClient *discovery.DiscoveryClient, adapterDeployment string) {
projectId := framework.TestContext.CloudConfig.ProjectID
ctx := context.Background()
client, err := google.DefaultClient(ctx, gcm.CloudPlatformScope)
// Hack for running tests locally, needed to authenticate in Stackdriver
// If this is your use case, create application default credentials:
// $ gcloud auth application-default login
// and uncomment following lines (comment out the two lines above):
/*
ts, err := google.DefaultTokenSource(oauth2.NoContext)
framework.Logf("Couldn't get application default credentials, %v", err)
if err != nil {
framework.Failf("Error accessing application default credentials, %v", err)
}
client := oauth2.NewClient(oauth2.NoContext, ts)
*/
gcmService, err := gcm.New(client)
if err != nil {
framework.Failf("Failed to create gcm service, %v", err)
@ -95,17 +102,17 @@ func testAdapter(f *framework.Framework, kubeClient clientset.Interface, customM
}
defer CleanupDescriptors(gcmService, projectId)
err = CreateAdapter()
err = CreateAdapter(adapterDeployment)
if err != nil {
framework.Failf("Failed to set up: %s", err)
}
defer CleanupAdapter()
defer CleanupAdapter(adapterDeployment)
_, err = kubeClient.RbacV1().ClusterRoleBindings().Create(HPAPermissions)
defer kubeClient.RbacV1().ClusterRoleBindings().Delete("custom-metrics-reader", &metav1.DeleteOptions{})
// Run application that exports the metric
err = createSDExporterPods(f, kubeClient)
_, err = createSDExporterPods(f, kubeClient)
if err != nil {
framework.Failf("Failed to create stackdriver-exporter pod: %s", err)
}
@ -116,25 +123,62 @@ func testAdapter(f *framework.Framework, kubeClient clientset.Interface, customM
// i.e. pod creation, first time series exported
time.Sleep(60 * time.Second)
// Verify responses from Custom Metrics API
verifyResponsesFromCustomMetricsAPI(f, customMetricsClient, discoveryClient)
}
// TODO(kawych): migrate this test to new resource model
func testExternalMetrics(f *framework.Framework, kubeClient clientset.Interface, externalMetricsClient externalclient.ExternalMetricsClient) {
projectId := framework.TestContext.CloudConfig.ProjectID
ctx := context.Background()
client, err := google.DefaultClient(ctx, gcm.CloudPlatformScope)
gcmService, err := gcm.New(client)
if err != nil {
framework.Failf("Failed to create gcm service, %v", err)
}
// Set up a cluster: create a custom metric and set up k8s-sd adapter
err = CreateDescriptors(gcmService, projectId)
if err != nil {
framework.Failf("Failed to create metric descriptor: %s", err)
}
defer CleanupDescriptors(gcmService, projectId)
// Both deployments - for old and new resource model - expose External Metrics API.
err = CreateAdapter(AdapterForOldResourceModel)
if err != nil {
framework.Failf("Failed to set up: %s", err)
}
defer CleanupAdapter(AdapterForOldResourceModel)
_, err = kubeClient.RbacV1().ClusterRoleBindings().Create(HPAPermissions)
defer kubeClient.RbacV1().ClusterRoleBindings().Delete("custom-metrics-reader", &metav1.DeleteOptions{})
// Run application that exports the metric
pod, err := createSDExporterPods(f, kubeClient)
if err != nil {
framework.Failf("Failed to create stackdriver-exporter pod: %s", err)
}
defer cleanupSDExporterPod(f, kubeClient)
// Wait a short amount of time to create a pod and export some metrics
// TODO: add some events to wait for instead of fixed amount of time
// i.e. pod creation, first time series exported
time.Sleep(60 * time.Second)
verifyResponseFromExternalMetricsAPI(f, externalMetricsClient, pod)
}
func verifyResponsesFromCustomMetricsAPI(f *framework.Framework, customMetricsClient customclient.CustomMetricsClient, discoveryClient *discovery.DiscoveryClient) {
resources, err := discoveryClient.ServerResourcesForGroupVersion("custom.metrics.k8s.io/v1beta1")
if err != nil {
framework.Failf("Failed to retrieve a list of supported metrics: %s", err)
}
gotCustomMetric, gotUnusedMetric := false, false
for _, resource := range resources.APIResources {
if resource.Name == "pods/"+CustomMetricName {
gotCustomMetric = true
} else if resource.Name == "pods/"+UnusedMetricName {
gotUnusedMetric = true
} else {
framework.Failf("Unexpected metric %s. Only metric %s should be supported", resource.Name, CustomMetricName)
}
}
if !gotCustomMetric {
if !containsResource(resources.APIResources, "*/custom.googleapis.com|"+CustomMetricName) {
framework.Failf("Metric '%s' expected but not received", CustomMetricName)
}
if !gotUnusedMetric {
if !containsResource(resources.APIResources, "*/custom.googleapis.com|"+UnusedMetricName) {
framework.Failf("Metric '%s' expected but not received", UnusedMetricName)
}
value, err := customMetricsClient.NamespacedMetrics(f.Namespace.Name).GetForObject(schema.GroupKind{Group: "", Kind: "Pod"}, stackdriverExporterPod1, CustomMetricName)
@ -160,6 +204,40 @@ func testAdapter(f *framework.Framework, kubeClient clientset.Interface, customM
}
}
func containsResource(resourcesList []metav1.APIResource, resourceName string) bool {
for _, resource := range resourcesList {
if resource.Name == resourceName {
return true
}
}
return false
}
func verifyResponseFromExternalMetricsAPI(f *framework.Framework, externalMetricsClient externalclient.ExternalMetricsClient, pod *v1.Pod) {
req1, _ := labels.NewRequirement("resource.type", selection.Equals, []string{"gke_container"})
// It's important to filter out only metrics from the right namespace, since multiple e2e tests
// may run in the same project concurrently. "dummy" is added to test
req2, _ := labels.NewRequirement("resource.labels.pod_id", selection.In, []string{string(pod.UID), "dummy"})
req3, _ := labels.NewRequirement("resource.labels.namespace_id", selection.Exists, []string{})
req4, _ := labels.NewRequirement("resource.labels.zone", selection.NotEquals, []string{"dummy"})
req5, _ := labels.NewRequirement("resource.labels.cluster_name", selection.NotIn, []string{"foo", "bar"})
values, err := externalMetricsClient.
NamespacedMetrics("dummy").
List("custom.googleapis.com|"+CustomMetricName, labels.NewSelector().Add(*req1, *req2, *req3, *req4, *req5))
if err != nil {
framework.Failf("Failed query: %s", err)
}
if len(values.Items) != 1 {
framework.Failf("Expected exactly one external metric value, but % values received", len(values.Items))
}
if values.Items[0].MetricName != "custom.googleapis.com|"+CustomMetricName ||
values.Items[0].Value.Value() != CustomMetricValue ||
// Check one label just to make sure labels are included
values.Items[0].MetricLabels["resource.labels.pod_id"] != string(pod.UID) {
framework.Failf("Unexpected result for metric %s: %v", CustomMetricName, values.Items[0])
}
}
func cleanupSDExporterPod(f *framework.Framework, cs clientset.Interface) {
err := cs.CoreV1().Pods(f.Namespace.Name).Delete(stackdriverExporterPod1, &metav1.DeleteOptions{})
if err != nil {
@ -171,11 +249,11 @@ func cleanupSDExporterPod(f *framework.Framework, cs clientset.Interface) {
}
}
func createSDExporterPods(f *framework.Framework, cs clientset.Interface) error {
_, err := cs.CoreV1().Pods(f.Namespace.Name).Create(StackdriverExporterPod(stackdriverExporterPod1, f.Namespace.Name, stackdriverExporterLabel, CustomMetricName, CustomMetricValue))
func createSDExporterPods(f *framework.Framework, cs clientset.Interface) (*v1.Pod, error) {
pod, err := cs.CoreV1().Pods(f.Namespace.Name).Create(StackdriverExporterPod(stackdriverExporterPod1, f.Namespace.Name, stackdriverExporterLabel, CustomMetricName, CustomMetricValue))
if err != nil {
return err
return nil, err
}
_, err = cs.CoreV1().Pods(f.Namespace.Name).Create(StackdriverExporterPod(stackdriverExporterPod2, f.Namespace.Name, stackdriverExporterLabel, UnusedMetricName, UnusedMetricValue))
return err
return pod, err
}

View File

@ -42,7 +42,7 @@ var _ = instrumentation.SIGDescribe("Monitoring", func() {
framework.SkipUnlessClusterMonitoringModeIs("influxdb")
})
It("should verify monitoring pods and all cluster nodes are available on influxdb using heapster.", func() {
It("should verify monitoring pods and all cluster nodes are available on influxdb using heapster [Feature:InfluxdbMonitoring]", func() {
testMonitoringUsingHeapsterInfluxdb(f.ClientSet)
})
})

View File

@ -0,0 +1,382 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"context"
"encoding/json"
"fmt"
"math"
"time"
"github.com/prometheus/common/model"
. "github.com/onsi/ginkgo"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
)
const (
prometheusQueryStep = time.Minute
prometheusMetricErrorTolerance = 0.25
prometheusMetricValidationDuration = time.Minute * 2
prometheusRate = time.Minute * 2
prometheusRequiredNodesUpDuration = time.Minute * 5
prometheusService = "prometheus"
prometheusSleepBetweenAttempts = time.Second * 30
prometheusTestTimeout = time.Minute * 5
customMetricValue = 1000
targetCPUUsage = 0.1
)
var _ = instrumentation.SIGDescribe("[Feature:PrometheusMonitoring] Prometheus", func() {
BeforeEach(func() {
framework.SkipUnlessPrometheusMonitoringIsEnabled()
})
f := framework.NewDefaultFramework("prometheus-monitoring")
It("should scrape container metrics from all nodes.", func() {
expectedNodes, err := getAllNodes(f.ClientSet)
framework.ExpectNoError(err)
retryUntilSucceeds(func() error {
return validateMetricAvailableForAllNodes(f.ClientSet, `container_cpu_usage_seconds_total`, expectedNodes)
}, prometheusTestTimeout)
})
It("should successfully scrape all targets", func() {
retryUntilSucceeds(func() error {
return validateAllActiveTargetsAreHealthy(f.ClientSet)
}, prometheusTestTimeout)
})
It("should contain correct container CPU metric.", func() {
query := prometheusCPUQuery(f.Namespace.Name, "prometheus-cpu-consumer", prometheusRate)
consumer := consumeCPUResources(f, "prometheus-cpu-consumer", targetCPUUsage*1000)
defer consumer.CleanUp()
retryUntilSucceeds(func() error {
return validateQueryReturnsCorrectValues(f.ClientSet, query, targetCPUUsage, 3, prometheusMetricErrorTolerance)
}, prometheusTestTimeout)
})
It("should scrape metrics from annotated pods.", func() {
query := prometheusPodCustomMetricQuery(f.Namespace.Name, "prometheus-custom-pod-metric")
consumer := exportCustomMetricFromPod(f, "prometheus-custom-pod-metric", customMetricValue)
defer consumer.CleanUp()
retryUntilSucceeds(func() error {
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
}, prometheusTestTimeout)
})
It("should scrape metrics from annotated services.", func() {
query := prometheusServiceCustomMetricQuery(f.Namespace.Name, "prometheus-custom-service-metric")
consumer := exportCustomMetricFromService(f, "prometheus-custom-service-metric", customMetricValue)
defer consumer.CleanUp()
retryUntilSucceeds(func() error {
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
}, prometheusTestTimeout)
})
})
func prometheusCPUQuery(namespace, podNamePrefix string, rate time.Duration) string {
return fmt.Sprintf(`sum(irate(container_cpu_usage_seconds_total{namespace="%v",pod_name=~"%v.*",image!=""}[%vm]))`,
namespace, podNamePrefix, int64(rate.Minutes()))
}
func prometheusServiceCustomMetricQuery(namespace, service string) string {
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%v",kubernetes_name="%v"})`, namespace, service)
}
func prometheusPodCustomMetricQuery(namespace, podNamePrefix string) string {
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%s",kubernetes_pod_name=~"%s.*"})`, namespace, podNamePrefix)
}
func consumeCPUResources(f *framework.Framework, consumerName string, cpuUsage int) *common.ResourceConsumer {
return common.NewDynamicResourceConsumer(consumerName, f.Namespace.Name, common.KindDeployment, 1, cpuUsage,
memoryUsed, 0, int64(cpuUsage), memoryLimit, f.ClientSet, f.InternalClientset, f.ScalesGetter)
}
func exportCustomMetricFromPod(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
podAnnotations := map[string]string{
"prometheus.io/scrape": "true",
"prometheus.io/path": "/Metrics",
"prometheus.io/port": "8080",
}
return common.NewMetricExporter(consumerName, f.Namespace.Name, podAnnotations, nil, metricValue, f.ClientSet, f.InternalClientset, f.ScalesGetter)
}
func exportCustomMetricFromService(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
serviceAnnotations := map[string]string{
"prometheus.io/scrape": "true",
"prometheus.io/path": "/Metrics",
"prometheus.io/port": "8080",
}
return common.NewMetricExporter(consumerName, f.Namespace.Name, nil, serviceAnnotations, metricValue, f.ClientSet, f.InternalClientset, f.ScalesGetter)
}
func validateMetricAvailableForAllNodes(c clientset.Interface, metric string, expectedNodesNames []string) error {
instanceLabels, err := getInstanceLabelsAvailableForMetric(c, prometheusRequiredNodesUpDuration, metric)
if err != nil {
return err
}
nodesWithMetric := make(map[string]bool)
for _, instance := range instanceLabels {
nodesWithMetric[instance] = true
}
missedNodesCount := 0
for _, nodeName := range expectedNodesNames {
if _, found := nodesWithMetric[nodeName]; !found {
missedNodesCount++
}
}
if missedNodesCount > 0 {
return fmt.Errorf("Metric not found for %v out of %v nodes", missedNodesCount, len(expectedNodesNames))
}
return nil
}
func validateAllActiveTargetsAreHealthy(c clientset.Interface) error {
discovery, err := fetchPrometheusTargetDiscovery(c)
if err != nil {
return err
}
if len(discovery.ActiveTargets) == 0 {
return fmt.Errorf("Prometheus is not scraping any targets, at least one target is required")
}
for _, target := range discovery.ActiveTargets {
if target.Health != HealthGood {
return fmt.Errorf("Target health not good. Target: %v", target)
}
}
return nil
}
func validateQueryReturnsCorrectValues(c clientset.Interface, query string, expectedValue float64, minSamplesCount int, errorTolerance float64) error {
samples, err := fetchQueryValues(c, query, prometheusMetricValidationDuration)
if err != nil {
return err
}
if len(samples) < minSamplesCount {
return fmt.Errorf("Not enough samples for query '%v', got %v", query, samples)
}
framework.Logf("Executed query '%v' returned %v", query, samples)
for _, value := range samples {
error := math.Abs(value-expectedValue) / expectedValue
if error >= errorTolerance {
return fmt.Errorf("Query result values outside expected value tolerance. Expected error below %v, got %v", errorTolerance, error)
}
}
return nil
}
func fetchQueryValues(c clientset.Interface, query string, duration time.Duration) ([]float64, error) {
now := time.Now()
response, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
if err != nil {
return nil, err
}
m, ok := response.(model.Matrix)
if !ok {
return nil, fmt.Errorf("Expected matric response, got: %T", response)
}
values := make([]float64, 0)
for _, stream := range m {
for _, sample := range stream.Values {
values = append(values, float64(sample.Value))
}
}
return values, nil
}
func getInstanceLabelsAvailableForMetric(c clientset.Interface, duration time.Duration, metric string) ([]string, error) {
var instance model.LabelValue
now := time.Now()
query := fmt.Sprintf(`sum(%v)by(instance)`, metric)
result, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
if err != nil {
return nil, err
}
instanceLabels := make([]string, 0)
m, ok := result.(model.Matrix)
if !ok {
framework.Failf("Expected matrix response for query '%v', got: %T", query, result)
return instanceLabels, nil
}
for _, stream := range m {
if instance, ok = stream.Metric["instance"]; !ok {
continue
}
instanceLabels = append(instanceLabels, string(instance))
}
return instanceLabels, nil
}
func fetchPrometheusTargetDiscovery(c clientset.Interface) (TargetDiscovery, error) {
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
defer cancel()
response, err := c.CoreV1().RESTClient().Get().
Context(ctx).
Namespace("kube-system").
Resource("services").
Name(prometheusService+":9090").
SubResource("proxy").
Suffix("api", "v1", "targets").
Do().
Raw()
var qres promTargetsResponse
if err != nil {
fmt.Printf(string(response))
return qres.Data, err
}
err = json.Unmarshal(response, &qres)
return qres.Data, nil
}
type promTargetsResponse struct {
Status string `json:"status"`
Data TargetDiscovery `json:"data"`
}
type TargetDiscovery struct {
ActiveTargets []*Target `json:"activeTargets"`
DroppedTargets []*DroppedTarget `json:"droppedTargets"`
}
type Target struct {
DiscoveredLabels map[string]string `json:"discoveredLabels"`
Labels map[string]string `json:"labels"`
ScrapeURL string `json:"scrapeUrl"`
LastError string `json:"lastError"`
LastScrape time.Time `json:"lastScrape"`
Health TargetHealth `json:"health"`
}
type DroppedTarget struct {
// Labels before any processing.
DiscoveredLabels map[string]string `json:"discoveredLabels"`
}
const (
HealthUnknown TargetHealth = "unknown"
HealthGood TargetHealth = "up"
HealthBad TargetHealth = "down"
)
type TargetHealth string
func queryPrometheus(c clientset.Interface, query string, start, end time.Time, step time.Duration) (model.Value, error) {
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
defer cancel()
response, err := c.CoreV1().RESTClient().Get().
Context(ctx).
Namespace("kube-system").
Resource("services").
Name(prometheusService+":9090").
SubResource("proxy").
Suffix("api", "v1", "query_range").
Param("query", query).
Param("start", fmt.Sprintf("%v", start.Unix())).
Param("end", fmt.Sprintf("%v", end.Unix())).
Param("step", fmt.Sprintf("%vs", step.Seconds())).
Do().
Raw()
if err != nil {
fmt.Printf(string(response))
return nil, err
}
var qres promQueryResponse
err = json.Unmarshal(response, &qres)
return model.Value(qres.Data.v), err
}
type promQueryResponse struct {
Status string `json:"status"`
Data responseData `json:"data"`
}
type responseData struct {
Type model.ValueType `json:"resultType"`
Result interface{} `json:"result"`
// The decoded value.
v model.Value
}
func (qr *responseData) UnmarshalJSON(b []byte) error {
v := struct {
Type model.ValueType `json:"resultType"`
Result json.RawMessage `json:"result"`
}{}
err := json.Unmarshal(b, &v)
if err != nil {
return err
}
switch v.Type {
case model.ValScalar:
var sv model.Scalar
err = json.Unmarshal(v.Result, &sv)
qr.v = &sv
case model.ValVector:
var vv model.Vector
err = json.Unmarshal(v.Result, &vv)
qr.v = vv
case model.ValMatrix:
var mv model.Matrix
err = json.Unmarshal(v.Result, &mv)
qr.v = mv
default:
err = fmt.Errorf("unexpected value type %q", v.Type)
}
return err
}
func retryUntilSucceeds(validator func() error, timeout time.Duration) {
startTime := time.Now()
var err error
for {
err = validator()
if err == nil {
return
}
if time.Since(startTime) >= timeout {
break
}
framework.Logf(err.Error())
time.Sleep(prometheusSleepBetweenAttempts)
}
framework.Failf(err.Error())
}
func getAllNodes(c clientset.Interface) ([]string, error) {
nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return nil, err
}
result := []string{}
for _, node := range nodeList.Items {
result = append(result, node.Name)
}
return result, nil
}

View File

@ -101,7 +101,7 @@ func testStackdriverMonitoring(f *framework.Framework, pods, allPodsCPU int, per
framework.ExpectNoError(err)
rc := common.NewDynamicResourceConsumer(rcName, f.Namespace.Name, common.KindDeployment, pods, allPodsCPU, memoryUsed, 0, perPodCPU, memoryLimit, f.ClientSet, f.InternalClientset)
rc := common.NewDynamicResourceConsumer(rcName, f.Namespace.Name, common.KindDeployment, pods, allPodsCPU, memoryUsed, 0, perPodCPU, memoryLimit, f.ClientSet, f.InternalClientset, f.ScalesGetter)
defer rc.CleanUp()
rc.WaitForReplicas(pods, 15*time.Minute)