Update to kube v1.17

Signed-off-by: Humble Chirammal <hchiramm@redhat.com>
This commit is contained in:
Humble Chirammal
2020-01-14 16:08:55 +05:30
committed by mergify[bot]
parent 327fcd1b1b
commit 3af1e26d7c
1710 changed files with 289562 additions and 168638 deletions

View File

@ -19,8 +19,9 @@ package persistentvolume
import (
"fmt"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
storage "k8s.io/api/storage/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
@ -69,6 +70,16 @@ const (
AnnStorageProvisioner = "volume.beta.kubernetes.io/storage-provisioner"
)
// IsDelayBindingProvisioning checks if claim provisioning with selected-node annotation
func IsDelayBindingProvisioning(claim *v1.PersistentVolumeClaim) bool {
// When feature VolumeScheduling enabled,
// Scheduler signal to the PV controller to start dynamic
// provisioning by setting the "AnnSelectedNode" annotation
// in the PVC
_, ok := claim.Annotations[AnnSelectedNode]
return ok
}
// IsDelayBindingMode checks if claim is in delay binding mode.
func IsDelayBindingMode(claim *v1.PersistentVolumeClaim, classLister storagelisters.StorageClassLister) (bool, error) {
className := v1helper.GetPersistentVolumeClaimClass(claim)
@ -78,7 +89,10 @@ func IsDelayBindingMode(claim *v1.PersistentVolumeClaim, classLister storagelist
class, err := classLister.Get(className)
if err != nil {
return false, nil
if apierrors.IsNotFound(err) {
return false, nil
}
return false, err
}
if class.VolumeBindingMode == nil {
@ -193,13 +207,8 @@ func FindMatchingVolume(
volumeQty := volume.Spec.Capacity[v1.ResourceStorage]
// check if volumeModes do not match (feature gate protected)
isMismatch, err := CheckVolumeModeMismatches(&claim.Spec, &volume.Spec)
if err != nil {
return nil, fmt.Errorf("error checking if volumeMode was a mismatch: %v", err)
}
// filter out mismatching volumeModes
if isMismatch {
if CheckVolumeModeMismatches(&claim.Spec, &volume.Spec) {
continue
}
@ -295,9 +304,22 @@ func FindMatchingVolume(
// CheckVolumeModeMismatches is a convenience method that checks volumeMode for PersistentVolume
// and PersistentVolumeClaims
func CheckVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1.PersistentVolumeSpec) (bool, error) {
func CheckVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1.PersistentVolumeSpec) bool {
if !utilfeature.DefaultFeatureGate.Enabled(features.BlockVolume) {
return false, nil
if pvcSpec.VolumeMode != nil && *pvcSpec.VolumeMode == v1.PersistentVolumeBlock {
// Block PVC does not match anything when the feature is off. We explicitly want
// to prevent binding block PVC to filesystem PV.
// The PVC should be ignored by PV controller.
return true
}
if pvSpec.VolumeMode != nil && *pvSpec.VolumeMode == v1.PersistentVolumeBlock {
// Block PV does not match anything when the feature is off. We explicitly want
// to prevent binding block PV to filesystem PVC.
// The PV should be ignored by PV controller.
return true
}
// Both PV + PVC are not block.
return false
}
// In HA upgrades, we cannot guarantee that the apiserver is on a version >= controller-manager.
@ -310,7 +332,7 @@ func CheckVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1
if pvSpec.VolumeMode != nil {
pvVolumeMode = *pvSpec.VolumeMode
}
return requestedVolumeMode != pvVolumeMode, nil
return requestedVolumeMode != pvVolumeMode
}
// CheckAccessModes returns true if PV satisfies all the PVC's requested AccessModes

View File

@ -14,10 +14,11 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
// VolumeSchedulerSubsystem - subsystem name used by scheduler
@ -25,30 +26,33 @@ const VolumeSchedulerSubsystem = "scheduler_volume"
var (
// VolumeBindingRequestSchedulerBinderCache tracks the number of volume binder cache operations.
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "binder_cache_requests_total",
Help: "Total number for request volume binding cache",
VolumeBindingRequestSchedulerBinderCache = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "binder_cache_requests_total",
Help: "Total number for request volume binding cache",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation"},
)
// VolumeSchedulingStageLatency tracks the latency of volume scheduling operations.
VolumeSchedulingStageLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_duration_seconds",
Help: "Volume scheduling stage latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
VolumeSchedulingStageLatency = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_duration_seconds",
Help: "Volume scheduling stage latency",
Buckets: metrics.ExponentialBuckets(1000, 2, 15),
StabilityLevel: metrics.ALPHA,
},
[]string{"operation"},
)
// VolumeSchedulingStageFailed tracks the number of failed volume scheduling operations.
VolumeSchedulingStageFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_stage_error_total",
Help: "Volume scheduling stage error count",
VolumeSchedulingStageFailed = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_stage_error_total",
Help: "Volume scheduling stage error count",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation"},
)
@ -57,7 +61,7 @@ var (
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
// used by scheduler process.
func RegisterVolumeSchedulingMetrics() {
prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache)
prometheus.MustRegister(VolumeSchedulingStageLatency)
prometheus.MustRegister(VolumeSchedulingStageFailed)
legacyregistry.MustRegister(VolumeBindingRequestSchedulerBinderCache)
legacyregistry.MustRegister(VolumeSchedulingStageLatency)
legacyregistry.MustRegister(VolumeSchedulingStageFailed)
}

View File

@ -185,8 +185,11 @@ func (c *assumeCache) add(obj interface{}) {
}
objInfo := &objInfo{name: name, latestObj: obj, apiObj: obj}
c.store.Update(objInfo)
klog.V(10).Infof("Adding %v %v to assume cache: %+v ", c.description, name, obj)
if err = c.store.Update(objInfo); err != nil {
klog.Warningf("got error when updating stored object : %v", err)
} else {
klog.V(10).Infof("Adding %v %v to assume cache: %+v ", c.description, name, obj)
}
}
func (c *assumeCache) update(oldObj interface{}, newObj interface{}) {
@ -396,6 +399,7 @@ func (c *pvAssumeCache) ListPVs(storageClassName string) []*v1.PersistentVolume
pv, ok := obj.(*v1.PersistentVolume)
if !ok {
klog.Errorf("ListPVs: %v", &errWrongType{"v1.PersistentVolume", obj})
continue
}
pvs = append(pvs, pv)
}

View File

@ -19,23 +19,39 @@ package scheduling
import (
"fmt"
"sort"
"strings"
"time"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/storage/etcd"
"k8s.io/apiserver/pkg/storage/etcd3"
utilfeature "k8s.io/apiserver/pkg/util/feature"
coreinformers "k8s.io/client-go/informers/core/v1"
storageinformers "k8s.io/client-go/informers/storage/v1"
clientset "k8s.io/client-go/kubernetes"
storagelisters "k8s.io/client-go/listers/storage/v1"
csitrans "k8s.io/csi-translation-lib"
csiplugins "k8s.io/csi-translation-lib/plugins"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
pvutil "k8s.io/kubernetes/pkg/controller/volume/persistentvolume/util"
"k8s.io/kubernetes/pkg/controller/volume/scheduling/metrics"
"k8s.io/kubernetes/pkg/features"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// InTreeToCSITranslator contains methods required to check migratable status
// and perform translations from InTree PV's to CSI
type InTreeToCSITranslator interface {
IsPVMigratable(pv *v1.PersistentVolume) bool
GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error)
TranslateInTreePVToCSI(pv *v1.PersistentVolume) (*v1.PersistentVolume, error)
}
// SchedulerVolumeBinder is used by the scheduler to handle PVC/PV binding
// and dynamic provisioning. The binding decisions are integrated into the pod scheduling
// workflow so that the PV NodeAffinity is also considered along with the pod's other
@ -102,9 +118,10 @@ type volumeBinder struct {
kubeClient clientset.Interface
classLister storagelisters.StorageClassLister
nodeInformer coreinformers.NodeInformer
pvcCache PVCAssumeCache
pvCache PVAssumeCache
nodeInformer coreinformers.NodeInformer
csiNodeInformer storageinformers.CSINodeInformer
pvcCache PVCAssumeCache
pvCache PVAssumeCache
// Stores binding decisions that were made in FindPodVolumes for use in AssumePodVolumes.
// AssumePodVolumes modifies the bindings again for use in BindPodVolumes.
@ -112,12 +129,15 @@ type volumeBinder struct {
// Amount of time to wait for the bind operation to succeed
bindTimeout time.Duration
translator InTreeToCSITranslator
}
// NewVolumeBinder sets up all the caches needed for the scheduler to make volume binding decisions.
func NewVolumeBinder(
kubeClient clientset.Interface,
nodeInformer coreinformers.NodeInformer,
csiNodeInformer storageinformers.CSINodeInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
pvInformer coreinformers.PersistentVolumeInformer,
storageClassInformer storageinformers.StorageClassInformer,
@ -127,10 +147,12 @@ func NewVolumeBinder(
kubeClient: kubeClient,
classLister: storageClassInformer.Lister(),
nodeInformer: nodeInformer,
csiNodeInformer: csiNodeInformer,
pvcCache: NewPVCAssumeCache(pvcInformer.Informer()),
pvCache: NewPVAssumeCache(pvInformer.Informer()),
podBindingCache: NewPodBindingCache(),
bindTimeout: bindTimeout,
translator: csitrans.New(),
}
return b
@ -154,9 +176,9 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
boundVolumesSatisfied = true
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds())
metrics.VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc()
metrics.VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc()
}
}()
@ -256,9 +278,9 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
klog.V(4).Infof("AssumePodVolumes for pod %q, node %q", podName, nodeName)
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("assume").Observe(time.Since(start).Seconds())
metrics.VolumeSchedulingStageLatency.WithLabelValues("assume").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("assume").Inc()
metrics.VolumeSchedulingStageFailed.WithLabelValues("assume").Inc()
}
}()
@ -332,9 +354,9 @@ func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) (err error) {
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("bind").Observe(time.Since(start).Seconds())
metrics.VolumeSchedulingStageLatency.WithLabelValues("bind").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("bind").Inc()
metrics.VolumeSchedulingStageFailed.WithLabelValues("bind").Inc()
}
}()
@ -347,10 +369,14 @@ func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) (err error) {
return err
}
return wait.Poll(time.Second, b.bindTimeout, func() (bool, error) {
err = wait.Poll(time.Second, b.bindTimeout, func() (bool, error) {
b, err := b.checkBindings(assumedPod, bindings, claimsToProvision)
return b, err
})
if err != nil {
return fmt.Errorf("Failed to bind volumes: %v", err)
}
return nil
}
func getPodName(pod *v1.Pod) string {
@ -425,7 +451,7 @@ func (b *volumeBinder) bindAPIUpdate(podName string, bindings []*bindingInfo, cl
}
var (
versioner = etcd.APIObjectVersioner{}
versioner = etcd3.APIObjectVersioner{}
)
// checkBindings runs through all the PVCs in the Pod and checks:
@ -452,6 +478,12 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
return false, fmt.Errorf("failed to get node %q: %v", pod.Spec.NodeName, err)
}
csiNode, err := b.csiNodeInformer.Lister().Get(node.Name)
if err != nil {
// TODO: return the error once CSINode is created by default
klog.V(4).Infof("Could not get a CSINode object for the node %q: %v", node.Name, err)
}
// Check for any conditions that might require scheduling retry
// When pod is removed from scheduling queue because of deletion or any
@ -480,6 +512,11 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
return false, nil
}
pv, err = b.tryTranslatePVToCSI(pv, csiNode)
if err != nil {
return false, fmt.Errorf("failed to translate pv to csi: %v", err)
}
// Check PV's node affinity (the node might not have the proper label)
if err := volumeutil.CheckNodeAffinity(pv, node.Labels); err != nil {
return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %v", pv.Name, node.Name, err)
@ -514,7 +551,10 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
}
selectedNode := pvc.Annotations[pvutil.AnnSelectedNode]
if selectedNode != pod.Spec.NodeName {
return false, fmt.Errorf("selectedNode annotation value %q not set to scheduled node %q", selectedNode, pod.Spec.NodeName)
// If provisioner fails to provision a volume, selectedNode
// annotation will be removed to signal back to the scheduler to
// retry.
return false, fmt.Errorf("provisioning failed for PVC %q", pvc.Name)
}
// If the PVC is bound to a PV, check its node affinity
@ -530,6 +570,12 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
}
return false, fmt.Errorf("failed to get pv %q from cache: %v", pvc.Spec.VolumeName, err)
}
pv, err = b.tryTranslatePVToCSI(pv, csiNode)
if err != nil {
return false, err
}
if err := volumeutil.CheckNodeAffinity(pv, node.Labels); err != nil {
return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %v", pv.Name, node.Name, err)
}
@ -633,6 +679,12 @@ func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentV
}
func (b *volumeBinder) checkBoundClaims(claims []*v1.PersistentVolumeClaim, node *v1.Node, podName string) (bool, error) {
csiNode, err := b.csiNodeInformer.Lister().Get(node.Name)
if err != nil {
// TODO: return the error once CSINode is created by default
klog.V(4).Infof("Could not get a CSINode object for the node %q: %v", node.Name, err)
}
for _, pvc := range claims {
pvName := pvc.Spec.VolumeName
pv, err := b.pvCache.GetPV(pvName)
@ -640,6 +692,11 @@ func (b *volumeBinder) checkBoundClaims(claims []*v1.PersistentVolumeClaim, node
return false, err
}
pv, err = b.tryTranslatePVToCSI(pv, csiNode)
if err != nil {
return false, err
}
err = volumeutil.CheckNodeAffinity(pv, node.Labels)
if err != nil {
klog.V(4).Infof("PersistentVolume %q, Node %q mismatch for Pod %q: %v", pvName, node.Name, podName, err)
@ -665,11 +722,7 @@ func (b *volumeBinder) findMatchingVolumes(pod *v1.Pod, claimsToBind []*v1.Persi
for _, pvc := range claimsToBind {
// Get storage class name from each PVC
storageClassName := ""
storageClass := pvc.Spec.StorageClassName
if storageClass != nil {
storageClassName = *storageClass
}
storageClassName := v1helper.GetPersistentVolumeClaimClass(pvc)
allPVs := b.pvCache.ListPVs(storageClassName)
pvcName := getPVCName(pvc)
@ -779,3 +832,72 @@ func (a byPVCSize) Less(i, j int) bool {
func claimToClaimKey(claim *v1.PersistentVolumeClaim) string {
return fmt.Sprintf("%s/%s", claim.Namespace, claim.Name)
}
// isCSIMigrationOnForPlugin checks if CSI migrartion is enabled for a given plugin.
func isCSIMigrationOnForPlugin(pluginName string) bool {
switch pluginName {
case csiplugins.AWSEBSInTreePluginName:
return utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAWS)
case csiplugins.GCEPDInTreePluginName:
return utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationGCE)
case csiplugins.AzureDiskInTreePluginName:
return utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAzureDisk)
case csiplugins.CinderInTreePluginName:
return utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationOpenStack)
}
return false
}
// isPluginMigratedToCSIOnNode checks if an in-tree plugin has been migrated to a CSI driver on the node.
func isPluginMigratedToCSIOnNode(pluginName string, csiNode *storagev1.CSINode) bool {
if csiNode == nil {
return false
}
csiNodeAnn := csiNode.GetAnnotations()
if csiNodeAnn == nil {
return false
}
var mpaSet sets.String
mpa := csiNodeAnn[v1.MigratedPluginsAnnotationKey]
if len(mpa) == 0 {
mpaSet = sets.NewString()
} else {
tok := strings.Split(mpa, ",")
mpaSet = sets.NewString(tok...)
}
return mpaSet.Has(pluginName)
}
// tryTranslatePVToCSI will translate the in-tree PV to CSI if it meets the criteria. If not, it returns the unmodified in-tree PV.
func (b *volumeBinder) tryTranslatePVToCSI(pv *v1.PersistentVolume, csiNode *storagev1.CSINode) (*v1.PersistentVolume, error) {
if !b.translator.IsPVMigratable(pv) {
return pv, nil
}
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigration) {
return pv, nil
}
pluginName, err := b.translator.GetInTreePluginNameFromSpec(pv, nil)
if err != nil {
return nil, fmt.Errorf("could not get plugin name from pv: %v", err)
}
if !isCSIMigrationOnForPlugin(pluginName) {
return pv, nil
}
if !isPluginMigratedToCSIOnNode(pluginName, csiNode) {
return pv, nil
}
transPV, err := b.translator.TranslateInTreePVToCSI(pv)
if err != nil {
return nil, fmt.Errorf("could not translate pv: %v", err)
}
return transPV, nil
}

View File

@ -19,7 +19,8 @@ package scheduling
import (
"sync"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/controller/volume/scheduling/metrics"
)
// PodBindingCache stores PV binding decisions per pod per node.
@ -93,7 +94,7 @@ func (c *podBindingCache) DeleteBindings(pod *v1.Pod) {
if _, ok := c.bindingDecisions[podName]; ok {
delete(c.bindingDecisions, podName)
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("delete").Inc()
metrics.VolumeBindingRequestSchedulerBinderCache.WithLabelValues("delete").Inc()
}
}
@ -113,7 +114,7 @@ func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*b
bindings: bindings,
provisionings: pvcs,
}
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("add").Inc()
metrics.VolumeBindingRequestSchedulerBinderCache.WithLabelValues("add").Inc()
} else {
decision.bindings = bindings
decision.provisionings = pvcs