2019-05-31 09:45:11 +00:00
|
|
|
/*
|
|
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package util
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"time"
|
|
|
|
|
2020-01-14 10:38:55 +00:00
|
|
|
"k8s.io/component-base/metrics"
|
|
|
|
"k8s.io/component-base/metrics/legacyregistry"
|
2019-05-31 09:45:11 +00:00
|
|
|
"k8s.io/kubernetes/pkg/volume"
|
|
|
|
)
|
|
|
|
|
2019-06-04 07:09:45 +00:00
|
|
|
const (
|
|
|
|
statusSuccess = "success"
|
|
|
|
statusFailUnknown = "fail-unknown"
|
|
|
|
)
|
|
|
|
|
2020-01-14 10:38:55 +00:00
|
|
|
/*
|
|
|
|
* By default, all the following metrics are defined as falling under
|
|
|
|
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/20190404-kubernetes-control-plane-metrics-stability.md#stability-classes)
|
|
|
|
*
|
|
|
|
* Promoting the stability level of the metric is a responsibility of the component owner, since it
|
|
|
|
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
|
|
|
|
* the metric stability policy.
|
|
|
|
*/
|
|
|
|
var storageOperationMetric = metrics.NewHistogramVec(
|
|
|
|
&metrics.HistogramOpts{
|
|
|
|
Name: "storage_operation_duration_seconds",
|
|
|
|
Help: "Storage operation duration",
|
|
|
|
Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
|
|
|
|
StabilityLevel: metrics.ALPHA,
|
2019-05-31 09:45:11 +00:00
|
|
|
},
|
|
|
|
[]string{"volume_plugin", "operation_name"},
|
|
|
|
)
|
|
|
|
|
2020-01-14 10:38:55 +00:00
|
|
|
var storageOperationErrorMetric = metrics.NewCounterVec(
|
|
|
|
&metrics.CounterOpts{
|
|
|
|
Name: "storage_operation_errors_total",
|
|
|
|
Help: "Storage operation errors",
|
|
|
|
StabilityLevel: metrics.ALPHA,
|
2019-05-31 09:45:11 +00:00
|
|
|
},
|
|
|
|
[]string{"volume_plugin", "operation_name"},
|
|
|
|
)
|
|
|
|
|
2020-01-14 10:38:55 +00:00
|
|
|
var storageOperationStatusMetric = metrics.NewCounterVec(
|
|
|
|
&metrics.CounterOpts{
|
|
|
|
Name: "storage_operation_status_count",
|
|
|
|
Help: "Storage operation return statuses count",
|
|
|
|
StabilityLevel: metrics.ALPHA,
|
2019-06-04 07:09:45 +00:00
|
|
|
},
|
|
|
|
[]string{"volume_plugin", "operation_name", "status"},
|
|
|
|
)
|
|
|
|
|
2020-01-14 10:38:55 +00:00
|
|
|
var storageOperationEndToEndLatencyMetric = metrics.NewHistogramVec(
|
|
|
|
&metrics.HistogramOpts{
|
|
|
|
Name: "volume_operation_total_seconds",
|
|
|
|
Help: "Storage operation end to end duration in seconds",
|
|
|
|
Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
|
|
|
|
StabilityLevel: metrics.ALPHA,
|
2019-06-24 09:08:09 +00:00
|
|
|
},
|
|
|
|
[]string{"plugin_name", "operation_name"},
|
|
|
|
)
|
|
|
|
|
2019-05-31 09:45:11 +00:00
|
|
|
func init() {
|
|
|
|
registerMetrics()
|
|
|
|
}
|
|
|
|
|
|
|
|
func registerMetrics() {
|
2020-01-14 10:38:55 +00:00
|
|
|
// legacyregistry is the internal k8s wrapper around the prometheus
|
|
|
|
// global registry, used specifically for metric stability enforcement
|
|
|
|
legacyregistry.MustRegister(storageOperationMetric)
|
|
|
|
legacyregistry.MustRegister(storageOperationErrorMetric)
|
|
|
|
legacyregistry.MustRegister(storageOperationStatusMetric)
|
|
|
|
legacyregistry.MustRegister(storageOperationEndToEndLatencyMetric)
|
2019-05-31 09:45:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// OperationCompleteHook returns a hook to call when an operation is completed
|
|
|
|
func OperationCompleteHook(plugin, operationName string) func(*error) {
|
|
|
|
requestTime := time.Now()
|
|
|
|
opComplete := func(err *error) {
|
|
|
|
timeTaken := time.Since(requestTime).Seconds()
|
|
|
|
// Create metric with operation name and plugin name
|
2019-06-04 07:09:45 +00:00
|
|
|
status := statusSuccess
|
2019-05-31 09:45:11 +00:00
|
|
|
if *err != nil {
|
2019-06-04 07:09:45 +00:00
|
|
|
// TODO: Establish well-known error codes to be able to distinguish
|
|
|
|
// user configuration errors from system errors.
|
|
|
|
status = statusFailUnknown
|
2019-05-31 09:45:11 +00:00
|
|
|
storageOperationErrorMetric.WithLabelValues(plugin, operationName).Inc()
|
|
|
|
} else {
|
|
|
|
storageOperationMetric.WithLabelValues(plugin, operationName).Observe(timeTaken)
|
|
|
|
}
|
2019-06-04 07:09:45 +00:00
|
|
|
storageOperationStatusMetric.WithLabelValues(plugin, operationName, status).Inc()
|
2019-05-31 09:45:11 +00:00
|
|
|
}
|
|
|
|
return opComplete
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetFullQualifiedPluginNameForVolume returns full qualified plugin name for
|
|
|
|
// given volume. For CSI plugin, it appends plugin driver name at the end of
|
|
|
|
// plugin name, e.g. kubernetes.io/csi:csi-hostpath. It helps to distinguish
|
|
|
|
// between metrics emitted for CSI volumes which may be handled by different
|
|
|
|
// CSI plugin drivers.
|
|
|
|
func GetFullQualifiedPluginNameForVolume(pluginName string, spec *volume.Spec) string {
|
|
|
|
if spec != nil && spec.PersistentVolume != nil && spec.PersistentVolume.Spec.CSI != nil {
|
|
|
|
return fmt.Sprintf("%s:%s", pluginName, spec.PersistentVolume.Spec.CSI.Driver)
|
|
|
|
}
|
|
|
|
return pluginName
|
|
|
|
}
|
2019-06-24 09:08:09 +00:00
|
|
|
|
|
|
|
// RecordOperationLatencyMetric records the end to end latency for certain operation
|
|
|
|
// into metric volume_operation_total_seconds
|
|
|
|
func RecordOperationLatencyMetric(plugin, operationName string, secondsTaken float64) {
|
|
|
|
storageOperationEndToEndLatencyMetric.WithLabelValues(plugin, operationName).Observe(secondsTaken)
|
|
|
|
}
|