vendor update for CSI 0.3.0

This commit is contained in:
gman
2018-07-18 16:47:22 +02:00
parent 6f484f92fc
commit 8ea659f0d5
6810 changed files with 438061 additions and 193861 deletions

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Copyright 2015 The Kubernetes Authors.
#

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Copyright 2015 The Kubernetes Authors.
#

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -ex

View File

@ -1,6 +1,6 @@
options:
channel:
type: string
default: "1.9/stable"
default: "1.10/stable"
description: |
Snap channel to install Kubernetes snaps from

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/local/sbin/charm-env python3
# Copyright 2015 The Kubernetes Authors.
#

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/local/sbin/charm-env python3
import os
from yaml import safe_load as load
from charmhelpers.core.hookenv import (

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set +ex

View File

@ -31,9 +31,17 @@ options:
privileged mode. If "auto", kube-apiserver will not run in privileged
mode by default, but will switch to privileged mode if gpu hardware is
detected on a worker node.
enable-nvidia-plugin:
type: string
default: "auto"
description: |
Load the nvidia device plugin daemonset. Supported values are
"auto" and "false". When "auto", the daemonset will be loaded
only if GPUs are detected. When "false" the nvidia device plugin
will not be loaded.
channel:
type: string
default: "1.9/stable"
default: "1.10/stable"
description: |
Snap channel to install Kubernetes master services from
client_password:
@ -86,3 +94,8 @@ options:
description: |
The storage backend for kube-apiserver persistence. Can be "etcd2", "etcd3", or
"auto". Auto mode will select etcd3 on new installations, or etcd2 on upgrades.
enable-metrics:
type: boolean
default: true
description: |
If true the metrics server for Kubernetes will be deployed onto the cluster.

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
MY_HOSTNAME=$(hostname)
: ${JUJU_UNIT_NAME:=`uuidgen`}

View File

@ -15,6 +15,8 @@ includes:
- 'interface:kube-dns'
- 'interface:kube-control'
- 'interface:public-address'
- 'interface:aws'
- 'interface:gcp'
options:
basic:
packages:

View File

@ -40,6 +40,10 @@ requires:
interface: public-address
ceph-storage:
interface: ceph-admin
aws:
interface: aws
gcp:
interface: gcp
resources:
kubectl:
type: file

View File

@ -28,10 +28,12 @@ from charms.leadership import leader_get, leader_set
from shutil import move
from pathlib import Path
from shlex import split
from subprocess import check_call
from subprocess import check_output
from subprocess import CalledProcessError
from urllib.request import Request, urlopen
from charms import layer
from charms.layer import snap
@ -39,7 +41,8 @@ from charms.reactive import hook
from charms.reactive import remove_state
from charms.reactive import set_state
from charms.reactive import is_state
from charms.reactive import when, when_any, when_not
from charms.reactive import endpoint_from_flag
from charms.reactive import when, when_any, when_not, when_none
from charms.reactive.helpers import data_changed, any_file_changed
from charms.kubernetes.common import get_version
from charms.kubernetes.common import retry
@ -60,6 +63,8 @@ from charmhelpers.contrib.charmsupport import nrpe
# default regex in charmhelpers doesn't allow periods, but nagios itself does.
nrpe.Check.shortname_re = '[\.A-Za-z0-9-_]+$'
gcp_creds_env_key = 'GOOGLE_APPLICATION_CREDENTIALS'
os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin')
@ -228,13 +233,6 @@ def migrate_from_pre_snaps():
os.remove(file)
@when('kubernetes-master.upgrade-needed')
@when_not('kubernetes-master.upgrade-specified')
def upgrade_needed_status():
msg = 'Needs manual upgrade, run the upgrade action'
hookenv.status_set('blocked', msg)
@when('kubernetes-master.upgrade-specified')
def do_upgrade():
install_snaps()
@ -378,8 +376,6 @@ def get_keys_from_leader(keys, overwrite_local=False):
contents = leader_get(k)
# Default to logging the warning and wait for leader data to be set
if contents is None:
msg = "Waiting on leaders crypto keys."
hookenv.status_set('waiting', msg)
hookenv.log('Missing content for file {}'.format(k))
return False
# Write out the file and move on to the next item
@ -397,24 +393,67 @@ def set_app_version():
hookenv.application_version_set(version.split(b' v')[-1].rstrip())
@when('cdk-addons.configured', 'kube-api-endpoint.available',
'kube-control.connected')
@when_not('kubernetes-master.upgrade-needed')
def idle_status(kube_api, kube_control):
''' Signal at the end of the run that we are running. '''
if not all_kube_system_pods_running():
hookenv.status_set('waiting', 'Waiting for kube-system pods to start')
elif hookenv.config('service-cidr') != service_cidr():
msg = 'WARN: cannot change service-cidr, still using ' + service_cidr()
hookenv.status_set('active', msg)
else:
@hookenv.atexit
def set_final_status():
''' Set the final status of the charm as we leave hook execution '''
if not is_state('kube-api-endpoint.available'):
hookenv.status_set('blocked', 'Waiting for kube-api-endpoint relation')
return
if not is_state('kube-control.connected'):
hookenv.status_set('blocked', 'Waiting for workers.')
return
upgrade_needed = is_state('kubernetes-master.upgrade-needed')
upgrade_specified = is_state('kubernetes-master.upgrade-specified')
if upgrade_needed and not upgrade_specified:
msg = 'Needs manual upgrade, run the upgrade action'
hookenv.status_set('blocked', msg)
return
if is_state('kubernetes-master.components.started'):
# All services should be up and running at this point. Double-check...
failing_services = master_services_down()
if len(failing_services) == 0:
hookenv.status_set('active', 'Kubernetes master running.')
else:
if len(failing_services) != 0:
msg = 'Stopped services: {}'.format(','.join(failing_services))
hookenv.status_set('blocked', msg)
return
is_leader = is_state('leadership.is_leader')
authentication_setup = is_state('authentication.setup')
if not is_leader and not authentication_setup:
hookenv.status_set('waiting', 'Waiting on leaders crypto keys.')
return
components_started = is_state('kubernetes-master.components.started')
addons_configured = is_state('cdk-addons.configured')
if components_started and not addons_configured:
hookenv.status_set('waiting', 'Waiting to retry addon deployment')
return
req_sent = is_state('kubernetes-master.cloud-request-sent')
aws_ready = is_state('endpoint.aws.ready')
gcp_ready = is_state('endpoint.gcp.ready')
if req_sent and not (aws_ready or gcp_ready):
hookenv.status_set('waiting', 'waiting for cloud integration')
if addons_configured and not all_kube_system_pods_running():
hookenv.status_set('waiting', 'Waiting for kube-system pods to start')
return
if hookenv.config('service-cidr') != service_cidr():
msg = 'WARN: cannot change service-cidr, still using ' + service_cidr()
hookenv.status_set('active', msg)
return
gpu_available = is_state('kube-control.gpu.available')
gpu_enabled = is_state('kubernetes-master.gpu.enabled')
if gpu_available and not gpu_enabled:
msg = 'GPUs available. Set allow-privileged="auto" to enable.'
hookenv.status_set('active', msg)
return
hookenv.status_set('active', 'Kubernetes master running.')
def master_services_down():
@ -540,18 +579,6 @@ def create_service_configs(kube_control):
remove_state('authentication.setup')
@when_not('kube-control.connected')
def missing_kube_control():
"""Inform the operator master is waiting for a relation to workers.
If deploying via bundle this won't happen, but if operator is upgrading a
a charm in a deployment that pre-dates the kube-control relation, it'll be
missing.
"""
hookenv.status_set('blocked', 'Waiting for workers.')
@when('kube-api-endpoint.available')
def push_service_data(kube_api):
''' Send configuration to the load balancer, and close access to the
@ -559,9 +586,9 @@ def push_service_data(kube_api):
kube_api.configure(port=6443)
def get_ingress_address(relation):
def get_ingress_address(relation_name):
try:
network_info = hookenv.network_get(relation.relation_name)
network_info = hookenv.network_get(relation_name)
except NotImplementedError:
network_info = []
@ -585,7 +612,7 @@ def send_data(tls, kube_api_endpoint):
kubernetes_service_ip = get_kubernetes_service_ip()
# Get ingress address
ingress_ip = get_ingress_address(kube_api_endpoint)
ingress_ip = get_ingress_address(kube_api_endpoint.relation_name)
domain = hookenv.config('dns_domain')
# Create SANs that the tls layer will add to the server cert.
@ -638,18 +665,24 @@ def kick_api_server(tls):
def configure_cdk_addons():
''' Configure CDK addons '''
remove_state('cdk-addons.configured')
load_gpu_plugin = hookenv.config('enable-nvidia-plugin').lower()
gpuEnable = (get_version('kube-apiserver') >= (1, 9) and
load_gpu_plugin == "auto" and
is_state('kubernetes-master.gpu.enabled'))
dbEnabled = str(hookenv.config('enable-dashboard-addons')).lower()
dnsEnabled = str(hookenv.config('enable-kube-dns')).lower()
metricsEnabled = str(hookenv.config('enable-metrics')).lower()
args = [
'arch=' + arch(),
'dns-ip=' + get_deprecated_dns_ip(),
'dns-domain=' + hookenv.config('dns_domain'),
'enable-dashboard=' + dbEnabled,
'enable-kube-dns=' + dnsEnabled
'enable-kube-dns=' + dnsEnabled,
'enable-metrics=' + metricsEnabled,
'enable-gpu=' + str(gpuEnable).lower()
]
check_call(['snap', 'set', 'cdk-addons'] + args)
if not addons_ready():
hookenv.status_set('waiting', 'Waiting to retry addon deployment')
remove_state('cdk-addons.configured')
return
@ -885,12 +918,10 @@ def on_gpu_available(kube_control):
We need to run in privileged mode.
"""
kube_version = get_version('kube-apiserver')
config = hookenv.config()
if config['allow-privileged'].lower() == "false":
hookenv.status_set(
'active',
'GPUs available. Set allow-privileged="auto" to enable.'
)
if (config['allow-privileged'].lower() == "false" and
kube_version < (1, 9)):
return
remove_state('kubernetes-master.components.started')
@ -898,11 +929,25 @@ def on_gpu_available(kube_control):
@when('kubernetes-master.gpu.enabled')
@when('kubernetes-master.components.started')
@when_not('kubernetes-master.privileged')
def disable_gpu_mode():
def gpu_with_no_privileged():
"""We were in gpu mode, but the operator has set allow-privileged="false",
so we can't run in gpu mode anymore.
"""
if get_version('kube-apiserver') < (1, 9):
remove_state('kubernetes-master.gpu.enabled')
@when('kube-control.connected')
@when_not('kube-control.gpu.available')
@when('kubernetes-master.gpu.enabled')
@when('kubernetes-master.components.started')
def gpu_departed(kube_control):
"""We were in gpu mode, but the workers informed us there is
no gpu support anymore.
"""
remove_state('kubernetes-master.gpu.enabled')
@ -918,24 +963,18 @@ def shutdown():
def restart_apiserver():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-apiserver')
host.service_restart('snap.kube-apiserver.daemon')
hookenv.status_set(prev_state, prev_msg)
def restart_controller_manager():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-controller-manager')
host.service_restart('snap.kube-controller-manager.daemon')
hookenv.status_set(prev_state, prev_msg)
def restart_scheduler():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-scheduler')
host.service_restart('snap.kube-scheduler.daemon')
hookenv.status_set(prev_state, prev_msg)
def arch():
@ -1076,6 +1115,7 @@ def configure_kubernetes_service(service, base_args, extra_args_key):
args = {}
for arg in prev_args:
# remove previous args by setting to null
# note this is so we remove them from the snap's config
args[arg] = 'null'
for k, v in base_args.items():
args[k] = v
@ -1099,6 +1139,14 @@ def configure_apiserver(etcd_connection_string, leader_etcd_version):
server_cert_path = layer_options.get('server_certificate_path')
server_key_path = layer_options.get('server_key_path')
# at one point in time, this code would set ca-client-cert,
# but this was removed. This was before configure_kubernetes_service
# kept track of old arguments and removed them, so client-ca-cert
# was able to hang around forever stored in the snap configuration.
# This removes that stale configuration from the snap if it still
# exists.
api_opts['client-ca-file'] = 'null'
if is_privileged():
api_opts['allow-privileged'] = 'true'
set_state('kubernetes-master.privileged')
@ -1124,6 +1172,7 @@ def configure_apiserver(etcd_connection_string, leader_etcd_version):
api_opts['service-account-key-file'] = '/root/cdk/serviceaccount.key'
api_opts['kubelet-preferred-address-types'] = \
'[InternalIP,Hostname,InternalDNS,ExternalDNS,ExternalIP]'
api_opts['advertise-address'] = get_ingress_address('kube-control')
etcd_dir = '/root/cdk/etcd'
etcd_ca = os.path.join(etcd_dir, 'client-ca.pem')
@ -1135,7 +1184,7 @@ def configure_apiserver(etcd_connection_string, leader_etcd_version):
api_opts['etcd-certfile'] = etcd_cert
api_opts['etcd-servers'] = etcd_connection_string
admission_control = [
admission_control_pre_1_9 = [
'Initializers',
'NamespaceLifecycle',
'LimitRanger',
@ -1144,19 +1193,54 @@ def configure_apiserver(etcd_connection_string, leader_etcd_version):
'DefaultTolerationSeconds'
]
admission_control = [
'NamespaceLifecycle',
'LimitRanger',
'ServiceAccount',
'PersistentVolumeLabel',
'DefaultStorageClass',
'DefaultTolerationSeconds',
'MutatingAdmissionWebhook',
'ValidatingAdmissionWebhook',
'ResourceQuota'
]
auth_mode = hookenv.config('authorization-mode')
if 'Node' in auth_mode:
admission_control.append('NodeRestriction')
api_opts['authorization-mode'] = auth_mode
if get_version('kube-apiserver') < (1, 6):
kube_version = get_version('kube-apiserver')
if kube_version < (1, 6):
hookenv.log('Removing DefaultTolerationSeconds from admission-control')
admission_control.remove('DefaultTolerationSeconds')
if get_version('kube-apiserver') < (1, 7):
admission_control_pre_1_9.remove('DefaultTolerationSeconds')
if kube_version < (1, 7):
hookenv.log('Removing Initializers from admission-control')
admission_control.remove('Initializers')
api_opts['admission-control'] = ','.join(admission_control)
admission_control_pre_1_9.remove('Initializers')
if kube_version < (1, 9):
api_opts['admission-control'] = ','.join(admission_control_pre_1_9)
else:
api_opts['admission-control'] = ','.join(admission_control)
if kube_version > (1, 6) and \
hookenv.config('enable-metrics'):
api_opts['requestheader-client-ca-file'] = ca_cert_path
api_opts['requestheader-allowed-names'] = 'client'
api_opts['requestheader-extra-headers-prefix'] = 'X-Remote-Extra-'
api_opts['requestheader-group-headers'] = 'X-Remote-Group'
api_opts['requestheader-username-headers'] = 'X-Remote-User'
api_opts['proxy-client-cert-file'] = client_cert_path
api_opts['proxy-client-key-file'] = client_key_path
api_opts['enable-aggregator-routing'] = 'true'
api_opts['client-ca-file'] = ca_cert_path
if is_state('endpoint.aws.ready'):
api_opts['cloud-provider'] = 'aws'
elif is_state('endpoint.gcp.ready'):
cloud_config_path = _cloud_config_path('kube-apiserver')
api_opts['cloud-provider'] = 'gce'
api_opts['cloud-config'] = str(cloud_config_path)
configure_kubernetes_service('kube-apiserver', api_opts, 'api-extra-args')
restart_apiserver()
@ -1179,6 +1263,13 @@ def configure_controller_manager():
controller_opts['service-account-private-key-file'] = \
'/root/cdk/serviceaccount.key'
if is_state('endpoint.aws.ready'):
controller_opts['cloud-provider'] = 'aws'
elif is_state('endpoint.gcp.ready'):
cloud_config_path = _cloud_config_path('kube-controller-manager')
controller_opts['cloud-provider'] = 'gce'
controller_opts['cloud-config'] = str(cloud_config_path)
configure_kubernetes_service('kube-controller-manager', controller_opts,
'controller-manager-extra-args')
restart_controller_manager()
@ -1278,19 +1369,88 @@ def all_kube_system_pods_running():
try:
output = check_output(cmd).decode('utf-8')
result = json.loads(output)
except CalledProcessError:
hookenv.log('failed to get kube-system pod status')
return False
hookenv.log('Checking system pods status: {}'.format(', '.join(
'='.join([pod['metadata']['name'], pod['status']['phase']])
for pod in result['items'])))
result = json.loads(output)
for pod in result['items']:
status = pod['status']['phase']
# Evicted nodes should re-spawn
if status != 'Running' and \
pod['status'].get('reason', '') != 'Evicted':
return False
all_pending = all(pod['status']['phase'] == 'Pending'
for pod in result['items'])
if is_state('endpoint.gcp.ready') and all_pending:
poke_network_unavailable()
return False
return True
# All pods must be Running or Evicted (which should re-spawn)
all_running = all(pod['status']['phase'] == 'Running' or
pod['status'].get('reason', '') == 'Evicted'
for pod in result['items'])
return all_running
def poke_network_unavailable():
"""
Work around https://github.com/kubernetes/kubernetes/issues/44254 by
manually poking the status into the API server to tell the nodes they have
a network route.
This is needed because kubelet sets the NetworkUnavailable flag and expects
the network plugin to clear it, which only kubenet does. There is some
discussion about refactoring the affected code but nothing has happened
in a while.
"""
cmd = ['kubectl', 'get', 'nodes', '-o', 'json']
try:
output = check_output(cmd).decode('utf-8')
nodes = json.loads(output)['items']
except CalledProcessError:
hookenv.log('failed to get kube-system nodes')
return
except (KeyError, json.JSONDecodeError) as e:
hookenv.log('failed to parse kube-system node status '
'({}): {}'.format(e, output), hookenv.ERROR)
return
for node in nodes:
node_name = node['metadata']['name']
url = 'http://localhost:8080/api/v1/nodes/{}/status'.format(node_name)
with urlopen(url) as response:
code = response.getcode()
body = response.read().decode('utf8')
if code != 200:
hookenv.log('failed to get node status from {} [{}]: {}'.format(
url, code, body), hookenv.ERROR)
return
try:
node_info = json.loads(body)
conditions = node_info['status']['conditions']
i = [c['type'] for c in conditions].index('NetworkUnavailable')
if conditions[i]['status'] == 'True':
hookenv.log('Clearing NetworkUnavailable from {}'.format(
node_name))
conditions[i] = {
"type": "NetworkUnavailable",
"status": "False",
"reason": "RouteCreated",
"message": "Manually set through k8s api",
}
req = Request(url, method='PUT',
data=json.dumps(node_info).encode('utf8'),
headers={'Content-Type': 'application/json'})
with urlopen(req) as response:
code = response.getcode()
body = response.read().decode('utf8')
if code not in (200, 201, 202):
hookenv.log('failed to update node status [{}]: {}'.format(
code, body), hookenv.ERROR)
return
except (json.JSONDecodeError, KeyError):
hookenv.log('failed to parse node status: {}'.format(body),
hookenv.ERROR)
return
def apiserverVersion():
@ -1311,3 +1471,124 @@ def getStorageBackend():
if storage_backend == 'auto':
storage_backend = leader_get('auto_storage_backend')
return storage_backend
@when('leadership.is_leader')
@when_not('leadership.set.cluster_tag')
def create_cluster_tag():
cluster_tag = 'kubernetes-{}'.format(token_generator().lower())
leader_set(cluster_tag=cluster_tag)
@when('leadership.set.cluster_tag',
'kube-control.connected')
@when_not('kubernetes-master.cluster-tag-sent')
def send_cluster_tag():
cluster_tag = leader_get('cluster_tag')
kube_control = endpoint_from_flag('kube-control.connected')
kube_control.set_cluster_tag(cluster_tag)
set_state('kubernetes-master.cluster-tag-sent')
@when_not('kube-control.connected')
def clear_cluster_tag_sent():
remove_state('kubernetes-master.cluster-tag-sent')
@when_any('endpoint.aws.joined',
'endpoint.gcp.joined')
@when('leadership.set.cluster_tag')
@when_not('kubernetes-master.cloud-request-sent')
def request_integration():
hookenv.status_set('maintenance', 'requesting cloud integration')
cluster_tag = leader_get('cluster_tag')
if is_state('endpoint.aws.joined'):
cloud = endpoint_from_flag('endpoint.aws.joined')
cloud.tag_instance({
'kubernetes.io/cluster/{}'.format(cluster_tag): 'owned',
'k8s.io/role/master': 'true',
})
cloud.tag_instance_security_group({
'kubernetes.io/cluster/{}'.format(cluster_tag): 'owned',
})
cloud.tag_instance_subnet({
'kubernetes.io/cluster/{}'.format(cluster_tag): 'owned',
})
cloud.enable_object_storage_management(['kubernetes-*'])
cloud.enable_load_balancer_management()
elif is_state('endpoint.gcp.joined'):
cloud = endpoint_from_flag('endpoint.gcp.joined')
cloud.label_instance({
'k8s-io-cluster-name': cluster_tag,
'k8s-io-role-master': 'master',
})
cloud.enable_object_storage_management()
cloud.enable_security_management()
cloud.enable_instance_inspection()
cloud.enable_network_management()
cloud.enable_dns_management()
cloud.enable_block_storage_management()
set_state('kubernetes-master.cloud-request-sent')
@when_none('endpoint.aws.joined',
'endpoint.gcp.joined')
@when('kubernetes-master.cloud-request-sent')
def clear_requested_integration():
remove_state('kubernetes-master.cloud-request-sent')
@when_any('endpoint.aws.ready',
'endpoint.gcp.ready')
@when_not('kubernetes-master.restarted-for-cloud')
def restart_for_cloud():
if is_state('endpoint.gcp.ready'):
_write_gcp_snap_config('kube-apiserver')
_write_gcp_snap_config('kube-controller-manager')
set_state('kubernetes-master.restarted-for-cloud')
remove_state('kubernetes-master.components.started') # force restart
def _snap_common_path(component):
return Path('/var/snap/{}/common'.format(component))
def _cloud_config_path(component):
return _snap_common_path(component) / 'cloud-config.conf'
def _gcp_creds_path(component):
return _snap_common_path(component) / 'gcp-creds.json'
def _daemon_env_path(component):
return _snap_common_path(component) / 'environment'
def _write_gcp_snap_config(component):
# gcp requires additional credentials setup
gcp = endpoint_from_flag('endpoint.gcp.ready')
creds_path = _gcp_creds_path(component)
with creds_path.open('w') as fp:
os.fchmod(fp.fileno(), 0o600)
fp.write(gcp.credentials)
# create a cloud-config file that sets token-url to nil to make the
# services use the creds env var instead of the metadata server, as
# well as making the cluster multizone
cloud_config_path = _cloud_config_path(component)
cloud_config_path.write_text('[Global]\n'
'token-url = nil\n'
'multizone = true\n')
daemon_env_path = _daemon_env_path(component)
if daemon_env_path.exists():
daemon_env = daemon_env_path.read_text()
if not daemon_env.endswith('\n'):
daemon_env += '\n'
else:
daemon_env = ''
if gcp_creds_env_key not in daemon_env:
daemon_env += '{}={}\n'.format(gcp_creds_env_key, creds_path)
daemon_env_path.parent.mkdir(parents=True, exist_ok=True)
daemon_env_path.write_text(daemon_env)

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/local/sbin/charm-env python3
# Copyright 2015 The Kubernetes Authors.
#
@ -21,7 +21,7 @@ from charmhelpers.core.hookenv import action_get
from charmhelpers.core.hookenv import action_set
from charmhelpers.core.hookenv import unit_public_ip
from charms.templating.jinja2 import render
from subprocess import call
from subprocess import call, check_output
os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin')
@ -30,6 +30,9 @@ context['replicas'] = action_get('replicas')
context['delete'] = action_get('delete')
context['public_address'] = unit_public_ip()
arch = check_output(['dpkg', '--print-architecture']).rstrip()
context['arch'] = arch.decode('utf-8')
if not context['replicas']:
context['replicas'] = 3

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -ex

View File

@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/local/sbin/charm-env python3
#
# For a usage examples, see README.md
#
@ -25,7 +25,7 @@ from base64 import b64encode
from charmhelpers.core.hookenv import action_get
from charmhelpers.core.hookenv import action_set
from charms.templating.jinja2 import render
from subprocess import call
from subprocess import call, check_output
os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin')
@ -33,6 +33,9 @@ deletion = action_get('delete')
context = {}
arch = check_output(['dpkg', '--print-architecture']).rstrip()
context['arch'] = arch.decode('utf-8')
# These config options must be defined in the case of a creation
param_error = False
for param in ('tlscert', 'tlskey', 'domain', 'htpasswd', 'htpasswd-plain'):

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -ex

View File

@ -22,7 +22,7 @@ options:
switch to privileged mode if gpu hardware is detected.
channel:
type: string
default: "1.9/stable"
default: "1.10/stable"
description: |
Snap channel to install Kubernetes worker services from
require-manual-upgrade:

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
MY_HOSTNAME=$(hostname)
: ${JUJU_UNIT_NAME:=`uuidgen`}

View File

@ -7,12 +7,14 @@ includes:
- 'layer:metrics'
- 'layer:nagios'
- 'layer:tls-client'
- 'layer:nvidia-cuda'
- 'layer:cdk-service-kicker'
- 'interface:http'
- 'interface:kubernetes-cni'
- 'interface:kube-dns'
- 'interface:kube-control'
- 'interface:aws'
- 'interface:gcp'
- 'interface:mount'
config:
deletes:
- install_from_upstream

View File

@ -7,6 +7,7 @@ maintainers:
- Konstantinos Tsakalozos <kos.tsakalozos@canonical.com>
- Charles Butler <Chuck@dasroot.net>
- Matthew Bruzek <mbruzek@ubuntu.com>
- Mike Wilson <mike.wilson@canonical.com>
description: |
Kubernetes is an open-source platform for deploying, scaling, and operations
of application containers across a cluster of hosts. Kubernetes is portable
@ -28,6 +29,12 @@ requires:
interface: kube-dns
kube-control:
interface: kube-control
aws:
interface: aws
gcp:
interface: gcp
nfs:
interface: mount
provides:
cni:
interface: kubernetes-cni
@ -37,6 +44,10 @@ resources:
type: file
filename: cni.tgz
description: CNI plugins for amd64
cni-arm64:
type: file
filename: cni.tgz
description: CNI plugins for arm64
cni-s390x:
type: file
filename: cni.tgz

View File

@ -21,6 +21,7 @@ import shutil
import subprocess
import time
from pathlib import Path
from shlex import split
from subprocess import check_call, check_output
from subprocess import CalledProcessError
@ -29,8 +30,9 @@ from socket import gethostname, getfqdn
from charms import layer
from charms.layer import snap
from charms.reactive import hook
from charms.reactive import endpoint_from_flag
from charms.reactive import set_state, remove_state, is_state
from charms.reactive import when, when_any, when_not
from charms.reactive import when, when_any, when_not, when_none
from charms.kubernetes.common import get_version
@ -49,6 +51,7 @@ nrpe.Check.shortname_re = '[\.A-Za-z0-9-_]+$'
kubeconfig_path = '/root/cdk/kubeconfig'
kubeproxyconfig_path = '/root/cdk/kubeproxyconfig'
kubeclientconfig_path = '/root/.kube/config'
gcp_creds_env_key = 'GOOGLE_APPLICATION_CREDENTIALS'
os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin')
db = unitdata.kv()
@ -69,7 +72,14 @@ def upgrade_charm():
# Remove gpu.enabled state so we can reconfigure gpu-related kubelet flags,
# since they can differ between k8s versions
remove_state('kubernetes-worker.gpu.enabled')
if is_state('kubernetes-worker.gpu.enabled'):
remove_state('kubernetes-worker.gpu.enabled')
try:
disable_gpu()
except ApplyNodeLabelFailed:
# Removing node label failed. Probably the master is unavailable.
# Proceed with the upgrade in hope GPUs will still be there.
hookenv.log('Failed to remove GPU labels. Proceed with upgrade.')
remove_state('kubernetes-worker.cni-plugins.installed')
remove_state('kubernetes-worker.config.created')
@ -356,9 +366,6 @@ def start_worker(kube_api, kube_control, auth_control, cni):
creds = db.get('credentials')
data_changed('kube-control.creds', creds)
# set --allow-privileged flag for kubelet
set_privileged()
create_config(random.choice(servers), creds)
configure_kubelet(dns, ingress_ip)
configure_kube_proxy(servers, cluster_cidr)
@ -610,16 +617,21 @@ def configure_kubelet(dns, ingress_ip):
if (dns['enable-kube-dns']):
kubelet_opts['cluster-dns'] = dns['sdn-ip']
privileged = is_state('kubernetes-worker.privileged')
kubelet_opts['allow-privileged'] = 'true' if privileged else 'false'
# set --allow-privileged flag for kubelet
kubelet_opts['allow-privileged'] = set_privileged()
if is_state('kubernetes-worker.gpu.enabled'):
if get_version('kubelet') < (1, 6):
hookenv.log('Adding --experimental-nvidia-gpus=1 to kubelet')
kubelet_opts['experimental-nvidia-gpus'] = '1'
else:
hookenv.log('Adding --feature-gates=Accelerators=true to kubelet')
kubelet_opts['feature-gates'] = 'Accelerators=true'
hookenv.log('Adding '
'--feature-gates=DevicePlugins=true '
'to kubelet')
kubelet_opts['feature-gates'] = 'DevicePlugins=true'
if is_state('endpoint.aws.ready'):
kubelet_opts['cloud-provider'] = 'aws'
elif is_state('endpoint.gcp.ready'):
cloud_config_path = _cloud_config_path('kubelet')
kubelet_opts['cloud-provider'] = 'gce'
kubelet_opts['cloud-config'] = str(cloud_config_path)
configure_kubernetes_service('kubelet', kubelet_opts, 'kubelet-extra-args')
@ -705,6 +717,9 @@ def launch_default_ingress_controller():
if context['arch'] == 's390x':
context['defaultbackend_image'] = \
"k8s.gcr.io/defaultbackend-s390x:1.4"
elif context['arch'] == 'arm64':
context['defaultbackend_image'] = \
"k8s.gcr.io/defaultbackend-arm64:1.4"
else:
context['defaultbackend_image'] = \
"k8s.gcr.io/defaultbackend:1.4"
@ -728,9 +743,16 @@ def launch_default_ingress_controller():
if context['arch'] == 's390x':
context['ingress_image'] = \
"docker.io/cdkbot/nginx-ingress-controller-s390x:0.9.0-beta.13"
elif context['arch'] == 'arm64':
context['ingress_image'] = \
"k8s.gcr.io/nginx-ingress-controller-arm64:0.9.0-beta.15"
else:
context['ingress_image'] = \
"k8s.gcr.io/nginx-ingress-controller:0.9.0-beta.15" # noqa
"k8s.gcr.io/nginx-ingress-controller:0.9.0-beta.15" # noqa
if get_version('kubelet') < (1, 9):
context['daemonset_api_version'] = 'extensions/v1beta1'
else:
context['daemonset_api_version'] = 'apps/v1beta2'
context['juju_application'] = hookenv.service_name()
manifest = addon_path.format('ingress-daemon-set.yaml')
render('ingress-daemon-set.yaml', manifest, context)
@ -847,18 +869,25 @@ def remove_nrpe_config(nagios=None):
def set_privileged():
"""Update the allow-privileged flag for kubelet.
"""Return 'true' if privileged containers are needed.
This is when a) the user requested them
b) user does not care (auto) and GPUs are available in a pre
1.9 era
"""
privileged = hookenv.config('allow-privileged').lower()
if privileged == 'auto':
gpu_enabled = is_state('kubernetes-worker.gpu.enabled')
privileged = 'true' if gpu_enabled else 'false'
gpu_needs_privileged = (is_state('kubernetes-worker.gpu.enabled') and
get_version('kubelet') < (1, 9))
if privileged == 'true':
set_state('kubernetes-worker.privileged')
else:
remove_state('kubernetes-worker.privileged')
if privileged == 'auto':
privileged = 'true' if gpu_needs_privileged else 'false'
if privileged == 'false' and gpu_needs_privileged:
disable_gpu()
remove_state('kubernetes-worker.gpu.enabled')
# No need to restart kubernetes (set the restart-needed state)
# because set-privileged is already in the restart path
return privileged
@when('config.changed.allow-privileged')
@ -871,18 +900,17 @@ def on_config_allow_privileged_change():
remove_state('config.changed.allow-privileged')
@when('cuda.installed')
@when('nvidia-docker.installed')
@when('kubernetes-worker.config.created')
@when_not('kubernetes-worker.gpu.enabled')
def enable_gpu():
"""Enable GPU usage on this node.
"""
config = hookenv.config()
if config['allow-privileged'] == "false":
if get_version('kubelet') < (1, 9):
hookenv.status_set(
'active',
'GPUs available. Set allow-privileged="auto" to enable.'
'Upgrade to snap channel >= 1.9/stable to enable GPU suppport.'
)
return
@ -897,7 +925,6 @@ def enable_gpu():
hookenv.log(cpe)
return
# Apply node labels
set_label('gpu', 'true')
set_label('cuda', 'true')
@ -906,15 +933,19 @@ def enable_gpu():
@when('kubernetes-worker.gpu.enabled')
@when_not('kubernetes-worker.privileged')
@when_not('nvidia-docker.installed')
@when_not('kubernetes-worker.restart-needed')
def nvidia_departed():
"""Cuda departed, probably due to the docker layer switching to a
non nvidia-docker."""
disable_gpu()
remove_state('kubernetes-worker.gpu.enabled')
set_state('kubernetes-worker.restart-needed')
def disable_gpu():
"""Disable GPU usage on this node.
This handler fires when we're running in gpu mode, and then the operator
sets allow-privileged="false". Since we can no longer run privileged
containers, we need to disable gpu mode.
"""
hookenv.log('Disabling gpu mode')
@ -922,9 +953,6 @@ def disable_gpu():
remove_label('gpu')
remove_label('cuda')
remove_state('kubernetes-worker.gpu.enabled')
set_state('kubernetes-worker.restart-needed')
@when('kubernetes-worker.gpu.enabled')
@when('kube-control.connected')
@ -1009,6 +1037,10 @@ def _systemctl_is_active(application):
def get_node_name():
kubelet_extra_args = parse_extra_args('kubelet-extra-args')
cloud_provider = kubelet_extra_args.get('cloud-provider', '')
if is_state('endpoint.aws.ready'):
cloud_provider = 'aws'
elif is_state('endpoint.gcp.ready'):
cloud_provider = 'gcp'
if cloud_provider == 'aws':
return getfqdn()
else:
@ -1049,3 +1081,158 @@ def remove_label(label):
retry = 'Failed to remove label {0}. Will retry.'.format(label)
if not persistent_call(cmd, retry):
raise ApplyNodeLabelFailed(retry)
@when_any('endpoint.aws.joined',
'endpoint.gcp.joined')
@when('kube-control.cluster_tag.available')
@when_not('kubernetes-worker.cloud-request-sent')
def request_integration():
hookenv.status_set('maintenance', 'requesting cloud integration')
kube_control = endpoint_from_flag('kube-control.cluster_tag.available')
cluster_tag = kube_control.get_cluster_tag()
if is_state('endpoint.aws.joined'):
cloud = endpoint_from_flag('endpoint.aws.joined')
cloud.tag_instance({
'kubernetes.io/cluster/{}'.format(cluster_tag): 'owned',
})
cloud.tag_instance_security_group({
'kubernetes.io/cluster/{}'.format(cluster_tag): 'owned',
})
cloud.tag_instance_subnet({
'kubernetes.io/cluster/{}'.format(cluster_tag): 'owned',
})
cloud.enable_object_storage_management(['kubernetes-*'])
elif is_state('endpoint.gcp.joined'):
cloud = endpoint_from_flag('endpoint.gcp.joined')
cloud.label_instance({
'k8s-io-cluster-name': cluster_tag,
})
cloud.enable_object_storage_management()
cloud.enable_instance_inspection()
cloud.enable_dns_management()
set_state('kubernetes-worker.cloud-request-sent')
hookenv.status_set('waiting', 'waiting for cloud integration')
@when_none('endpoint.aws.joined',
'endpoint.gcp.joined')
def clear_requested_integration():
remove_state('kubernetes-worker.cloud-request-sent')
@when_any('endpoint.aws.ready',
'endpoint.gcp.ready')
@when_not('kubernetes-worker.restarted-for-cloud')
def restart_for_cloud():
if is_state('endpoint.gcp.ready'):
_write_gcp_snap_config('kubelet')
set_state('kubernetes-worker.restarted-for-cloud')
set_state('kubernetes-worker.restart-needed')
def _snap_common_path(component):
return Path('/var/snap/{}/common'.format(component))
def _cloud_config_path(component):
return _snap_common_path(component) / 'cloud-config.conf'
def _gcp_creds_path(component):
return _snap_common_path(component) / 'gcp-creds.json'
def _daemon_env_path(component):
return _snap_common_path(component) / 'environment'
def _write_gcp_snap_config(component):
# gcp requires additional credentials setup
gcp = endpoint_from_flag('endpoint.gcp.ready')
creds_path = _gcp_creds_path(component)
with creds_path.open('w') as fp:
os.fchmod(fp.fileno(), 0o600)
fp.write(gcp.credentials)
# create a cloud-config file that sets token-url to nil to make the
# services use the creds env var instead of the metadata server, as
# well as making the cluster multizone
cloud_config_path = _cloud_config_path(component)
cloud_config_path.write_text('[Global]\n'
'token-url = nil\n'
'multizone = true\n')
daemon_env_path = _daemon_env_path(component)
if daemon_env_path.exists():
daemon_env = daemon_env_path.read_text()
if not daemon_env.endswith('\n'):
daemon_env += '\n'
else:
daemon_env = ''
if gcp_creds_env_key not in daemon_env:
daemon_env += '{}={}\n'.format(gcp_creds_env_key, creds_path)
daemon_env_path.parent.mkdir(parents=True, exist_ok=True)
daemon_env_path.write_text(daemon_env)
def get_first_mount(mount_relation):
mount_relation_list = mount_relation.mounts()
if mount_relation_list and len(mount_relation_list) > 0:
# mount relation list is a list of the mount layer relations
# for now we just use the first one that is nfs
for mount in mount_relation_list:
# for now we just check the first mount and use that.
# the nfs charm only supports one for now.
if ('mounts' in mount and
mount['mounts'][0]['fstype'] == 'nfs'):
return mount['mounts'][0]
return None
@when('nfs.available')
def nfs_state_control(mount):
''' Determine if we should remove the state that controls the re-render
and execution of the nfs-relation-changed event because there
are changes in the relationship data, and we should re-render any
configs '''
mount_data = get_first_mount(mount)
if mount_data:
nfs_relation_data = {
'options': mount_data['options'],
'host': mount_data['hostname'],
'mountpoint': mount_data['mountpoint'],
'fstype': mount_data['fstype']
}
# Re-execute the rendering if the data has changed.
if data_changed('nfs-config', nfs_relation_data):
hookenv.log('reconfiguring nfs')
remove_state('nfs.configured')
@when('nfs.available')
@when_not('nfs.configured')
def nfs_storage(mount):
'''NFS on kubernetes requires nfs config rendered into a deployment of
the nfs client provisioner. That will handle the persistent volume claims
with no persistent volume to back them.'''
mount_data = get_first_mount(mount)
if not mount_data:
return
addon_path = '/root/cdk/addons/{}'
# Render the NFS deployment
manifest = addon_path.format('nfs-provisioner.yaml')
render('nfs-provisioner.yaml', manifest, mount_data)
hookenv.log('Creating the nfs provisioner.')
try:
kubectl('apply', '-f', manifest)
except CalledProcessError as e:
hookenv.log(e)
hookenv.log('Failed to create nfs provisioner. Will attempt again next update.') # noqa
return
set_state('nfs.configured')

View File

@ -127,7 +127,7 @@ kind: ConfigMap
metadata:
name: nginx-load-balancer-{{ juju_application }}-conf
---
apiVersion: apps/v1beta2
apiVersion: {{ daemonset_api_version }}
kind: DaemonSet
metadata:
name: nginx-ingress-{{ juju_application }}-controller
@ -152,7 +152,6 @@ spec:
containers:
- image: {{ ingress_image }}
name: nginx-ingress-{{ juju_application }}
imagePullPolicy: Always
livenessProbe:
httpGet:
path: /healthz

View File

@ -18,7 +18,7 @@ spec:
app: microbot
spec:
containers:
- image: dontrebootme/microbot:v1
- image: cdkbot/microbot-{{ arch }}:latest
imagePullPolicy: ""
name: microbot
ports:

View File

@ -0,0 +1,39 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: default
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: fuseim.pri/ifs
---
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: nfs-client-provisioner
spec:
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
containers:
- name: nfs-client-provisioner
image: quay.io/external_storage/nfs-client-provisioner:latest
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: fuseim.pri/ifs
- name: NFS_SERVER
value: {{ hostname }}
- name: NFS_PATH
value: {{ mountpoint }}
volumes:
- name: nfs-client-root
nfs:
server: {{ hostname }}
path: {{ mountpoint }}

View File

@ -37,7 +37,7 @@ spec:
spec:
containers:
- name: registry
image: registry:2
image: cdkbot/registry-{{ arch }}:2.6
resources:
# keep request = limit to keep this container in guaranteed class
limits:

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Copyright 2015 The Kubernetes Authors.
#

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Copyright 2015 The Kubernetes Authors.
#