vendor updates

This commit is contained in:
Serguei Bezverkhi
2018-03-06 17:33:18 -05:00
parent 4b3ebc171b
commit e9033989a0
5854 changed files with 248382 additions and 119809 deletions

View File

@ -9,3 +9,7 @@ options:
description: |
Space-separated list of extra SAN entries to add to the x509 certificate
created for the load balancers.
proxy_read_timeout:
type: int
default: 90
description: Timeout in seconds for reading a response from proxy server.

View File

@ -21,6 +21,7 @@ import subprocess
from charms import layer
from charms.reactive import when, when_any, when_not
from charms.reactive import set_state, remove_state
from charms.reactive import hook
from charmhelpers.core import hookenv
from charmhelpers.core import host
from charmhelpers.contrib.charmsupport import nrpe
@ -35,8 +36,43 @@ from subprocess import STDOUT
from subprocess import CalledProcessError
@when('certificates.available')
def request_server_certificates(tls):
apilb_nginx = """/var/log/nginx.*.log {
daily
missingok
rotate 14
compress
delaycompress
notifempty
create 0640 www-data adm
sharedscripts
prerotate
if [ -d /etc/logrotate.d/httpd-prerotate ]; then \\
run-parts /etc/logrotate.d/httpd-prerotate; \\
fi \\
endscript
postrotate
invoke-rc.d nginx rotate >/dev/null 2>&1
endscript
}"""
def get_ingress_address(relation):
try:
network_info = hookenv.network_get(relation.relation_name)
except NotImplementedError:
network_info = []
if network_info and 'ingress-addresses' in network_info:
# just grab the first one for now, maybe be more robust here?
return network_info['ingress-addresses'][0]
else:
# if they don't have ingress-addresses they are running a juju that
# doesn't support spaces, so just return the private address
return hookenv.unit_get('private-address')
@when('certificates.available', 'website.available')
def request_server_certificates(tls, website):
'''Send the data that is required to create a server certificate for
this server.'''
# Use the public ip of this unit as the Common Name for the certificate.
@ -44,7 +80,7 @@ def request_server_certificates(tls):
# Create SANs that the tls layer will add to the server cert.
sans = [
hookenv.unit_public_ip(),
hookenv.unit_private_ip(),
get_ingress_address(website),
socket.gethostname(),
]
# maybe they have extra names they want as SANs
@ -57,12 +93,13 @@ def request_server_certificates(tls):
tls.request_server_cert(common_name, sans, certificate_name)
@when('config.changed.extra_sans', 'certificates.available')
def update_certificate(tls):
@when('config.changed.extra_sans', 'certificates.available',
'website.available')
def update_certificate(tls, website):
# Using the config.changed.extra_sans flag to catch changes.
# IP changes will take ~5 minutes or so to propagate, but
# it will update.
request_server_certificates(tls)
request_server_certificates(tls, website)
@when('certificates.server.cert.available',
@ -89,6 +126,14 @@ def close_old_port():
hookenv.log('Port %d already closed, skipping.' % old_port)
def maybe_write_apilb_logrotate_config():
filename = '/etc/logrotate.d/apilb_nginx'
if not os.path.exists(filename):
# Set log rotation for apilb log file
with open(filename, 'w+') as fp:
fp.write(apilb_nginx)
@when('nginx.available', 'apiserver.available',
'certificates.server.cert.available')
def install_load_balancer(apiserver, tls):
@ -122,10 +167,18 @@ def install_load_balancer(apiserver, tls):
port=port,
server_certificate=server_cert_path,
server_key=server_key_path,
proxy_read_timeout=hookenv.config('proxy_read_timeout')
)
maybe_write_apilb_logrotate_config()
hookenv.status_set('active', 'Loadbalancer ready.')
@hook('upgrade-charm')
def upgrade_charm():
maybe_write_apilb_logrotate_config()
@when('nginx.available')
def set_nginx_version():
''' Surface the currently deployed version of nginx to Juju '''

View File

@ -36,6 +36,6 @@ server {
add_header X-Stream-Protocol-Version $upstream_http_x_stream_protocol_version;
proxy_pass https://target_service;
proxy_read_timeout 90;
proxy_read_timeout {{ proxy_read_timeout }};
}
}

View File

@ -1,6 +1,6 @@
options:
channel:
type: string
default: "1.8/stable"
default: "1.9/stable"
description: |
Snap channel to install Kubernetes snaps from

View File

@ -48,7 +48,7 @@ def report_status():
def messaging():
''' Probe our relations to determine the propper messaging to the
''' Probe our relations to determine the proper messaging to the
end user '''
missing_services = []

View File

@ -60,7 +60,7 @@ def main():
if not validate_space(context['RBD_SIZE']):
return
# Ensure our paramters match
# Ensure our parameters match
param_validation = validate_parameters(context['RBD_NAME'],
context['RBD_FS'],
context['PV_MODE'])

View File

@ -3,6 +3,10 @@ options:
type: boolean
default: True
description: Deploy the Kubernetes Dashboard and Heapster addons
enable-kube-dns:
type: boolean
default: True
description: Deploy kube-dns addon
dns_domain:
type: string
default: cluster.local
@ -29,7 +33,7 @@ options:
detected on a worker node.
channel:
type: string
default: "1.8/stable"
default: "1.9/stable"
description: |
Snap channel to install Kubernetes master services from
client_password:
@ -76,3 +80,9 @@ options:
description: |
When true, master nodes will not be upgraded until the user triggers
it manually by running the upgrade action.
storage-backend:
type: string
default: "auto"
description: |
The storage backend for kube-apiserver persistence. Can be "etcd2", "etcd3", or
"auto". Auto mode will select etcd3 on new installations, or etcd2 on upgrades.

View File

@ -24,7 +24,7 @@ import string
import json
import ipaddress
import charms.leadership
from charms.leadership import leader_get, leader_set
from shutil import move
@ -39,7 +39,7 @@ from charms.reactive import hook
from charms.reactive import remove_state
from charms.reactive import set_state
from charms.reactive import is_state
from charms.reactive import when, when_any, when_not, when_all
from charms.reactive import when, when_any, when_not
from charms.reactive.helpers import data_changed, any_file_changed
from charms.kubernetes.common import get_version
from charms.kubernetes.common import retry
@ -63,13 +63,13 @@ nrpe.Check.shortname_re = '[\.A-Za-z0-9-_]+$'
os.environ['PATH'] += os.pathsep + os.path.join(os.sep, 'snap', 'bin')
def set_upgrade_needed():
def set_upgrade_needed(forced=False):
set_state('kubernetes-master.upgrade-needed')
config = hookenv.config()
previous_channel = config.previous('channel')
require_manual = config.get('require-manual-upgrade')
hookenv.log('set upgrade needed')
if previous_channel is None or not require_manual:
if previous_channel is None or not require_manual or forced:
hookenv.log('forcing upgrade')
set_state('kubernetes-master.upgrade-specified')
@ -102,12 +102,45 @@ def check_for_upgrade_needed():
add_rbac_roles()
set_state('reconfigure.authentication.setup')
remove_state('authentication.setup')
changed = snap_resources_changed()
if changed == 'yes':
set_upgrade_needed()
elif changed == 'unknown':
# We are here on an upgrade from non-rolling master
# Since this upgrade might also include resource updates eg
# juju upgrade-charm kubernetes-master --resource kube-any=my.snap
# we take no risk and forcibly upgrade the snaps.
# Forcibly means we do not prompt the user to call the upgrade action.
set_upgrade_needed(forced=True)
# Set the auto storage backend to etcd2.
auto_storage_backend = leader_get('auto_storage_backend')
is_leader = is_state('leadership.is_leader')
if not auto_storage_backend and is_leader:
leader_set(auto_storage_backend='etcd2')
def snap_resources_changed():
'''
Check if the snapped resources have changed. The first time this method is
called will report "unknown".
Returns: "yes" in case a snap resource file has changed,
"no" in case a snap resources are the same as last call,
"unknown" if it is the first time this method is called
'''
db = unitdata.kv()
resources = ['kubectl', 'kube-apiserver', 'kube-controller-manager',
'kube-scheduler', 'cdk-addons']
paths = [hookenv.resource_get(resource) for resource in resources]
if any_file_changed(paths):
set_upgrade_needed()
if db.get('snap.resources.fingerprint.initialised'):
result = 'yes' if any_file_changed(paths) else 'no'
return result
else:
db.set('snap.resources.fingerprint.initialised', True)
any_file_changed(paths)
return 'unknown'
def add_rbac_roles():
@ -222,6 +255,7 @@ def install_snaps():
snap.install('kube-scheduler', channel=channel)
hookenv.status_set('maintenance', 'Installing cdk-addons snap')
snap.install('cdk-addons', channel=channel)
snap_resources_changed()
set_state('kubernetes-master.snaps.installed')
remove_state('kubernetes-master.components.started')
@ -236,12 +270,17 @@ def password_changed():
elif password == "":
# Password not initialised
password = token_generator()
setup_basic_auth(password, "admin", "admin")
setup_basic_auth(password, "admin", "admin", "system:masters")
set_state('reconfigure.authentication.setup')
remove_state('authentication.setup')
set_state('client.password.initialised')
@when('config.changed.storage-backend')
def storage_backend_changed():
remove_state('kubernetes-master.components.started')
@when('cni.connected')
@when_not('cni.configured')
def configure_cni(cni):
@ -288,7 +327,7 @@ def setup_leader_authentication():
# path as a key.
# eg:
# {'/root/cdk/serviceaccount.key': 'RSA:2471731...'}
charms.leadership.leader_set(leader_data)
leader_set(leader_data)
remove_state('kubernetes-master.components.started')
set_state('authentication.setup')
@ -336,7 +375,7 @@ def get_keys_from_leader(keys, overwrite_local=False):
# If the path does not exist, assume we need it
if not os.path.exists(k) or overwrite_local:
# Fetch data from leadership broadcast
contents = charms.leadership.leader_get(k)
contents = leader_get(k)
# Default to logging the warning and wait for leader data to be set
if contents is None:
msg = "Waiting on leaders crypto keys."
@ -360,6 +399,7 @@ def set_app_version():
@when('cdk-addons.configured', 'kube-api-endpoint.available',
'kube-control.connected')
@when_not('kubernetes-master.upgrade-needed')
def idle_status(kube_api, kube_control):
''' Signal at the end of the run that we are running. '''
if not all_kube_system_pods_running():
@ -394,6 +434,7 @@ def master_services_down():
@when('etcd.available', 'tls_client.server.certificate.saved',
'authentication.setup')
@when('leadership.set.auto_storage_backend')
@when_not('kubernetes-master.components.started')
def start_master(etcd):
'''Run the Kubernetes master components.'''
@ -411,10 +452,10 @@ def start_master(etcd):
handle_etcd_relation(etcd)
# Add CLI options to all components
configure_apiserver(etcd)
configure_apiserver(etcd.get_connection_string(), getStorageBackend())
configure_controller_manager()
configure_scheduler()
set_state('kubernetes-master.components.started')
hookenv.open_port(6443)
@ -422,7 +463,7 @@ def start_master(etcd):
def etcd_data_change(etcd):
''' Etcd scale events block master reconfiguration due to the
kubernetes-master.components.started state. We need a way to
handle these events consistenly only when the number of etcd
handle these events consistently only when the number of etcd
units has actually changed '''
# key off of the connection string
@ -433,15 +474,31 @@ def etcd_data_change(etcd):
if data_changed('etcd-connect', connection_string):
remove_state('kubernetes-master.components.started')
# We are the leader and the auto_storage_backend is not set meaning
# this is the first time we connect to etcd.
auto_storage_backend = leader_get('auto_storage_backend')
is_leader = is_state('leadership.is_leader')
if is_leader and not auto_storage_backend:
if etcd.get_version().startswith('3.'):
leader_set(auto_storage_backend='etcd3')
else:
leader_set(auto_storage_backend='etcd2')
@when('kube-control.connected')
@when('cdk-addons.configured')
def send_cluster_dns_detail(kube_control):
''' Send cluster DNS info '''
# Note that the DNS server doesn't necessarily exist at this point. We know
# where we're going to put it, though, so let's send the info anyway.
dns_ip = get_dns_ip()
kube_control.set_dns(53, hookenv.config('dns_domain'), dns_ip)
enableKubeDNS = hookenv.config('enable-kube-dns')
dnsDomain = hookenv.config('dns_domain')
dns_ip = None
if enableKubeDNS:
try:
dns_ip = get_dns_ip()
except CalledProcessError:
hookenv.log("kubedns not ready yet")
return
kube_control.set_dns(53, dnsDomain, dns_ip, enableKubeDNS)
@when('kube-control.connected')
@ -502,8 +559,23 @@ def push_service_data(kube_api):
kube_api.configure(port=6443)
@when('certificates.available')
def send_data(tls):
def get_ingress_address(relation):
try:
network_info = hookenv.network_get(relation.relation_name)
except NotImplementedError:
network_info = []
if network_info and 'ingress-addresses' in network_info:
# just grab the first one for now, maybe be more robust here?
return network_info['ingress-addresses'][0]
else:
# if they don't have ingress-addresses they are running a juju that
# doesn't support spaces, so just return the private address
return hookenv.unit_get('private-address')
@when('certificates.available', 'kube-api-endpoint.available')
def send_data(tls, kube_api_endpoint):
'''Send the data that is required to create a server certificate for
this server.'''
# Use the public ip of this unit as the Common Name for the certificate.
@ -512,11 +584,14 @@ def send_data(tls):
# Get the SDN gateway based on the cidr address.
kubernetes_service_ip = get_kubernetes_service_ip()
# Get ingress address
ingress_ip = get_ingress_address(kube_api_endpoint)
domain = hookenv.config('dns_domain')
# Create SANs that the tls layer will add to the server cert.
sans = [
hookenv.unit_public_ip(),
hookenv.unit_private_ip(),
ingress_ip,
socket.gethostname(),
kubernetes_service_ip,
'kubernetes',
@ -537,12 +612,13 @@ def send_data(tls):
tls.request_server_cert(common_name, sans, certificate_name)
@when('config.changed.extra_sans', 'certificates.available')
def update_certificate(tls):
@when('config.changed.extra_sans', 'certificates.available',
'kube-api-endpoint.available')
def update_certificate(tls, kube_api_endpoint):
# Using the config.changed.extra_sans flag to catch changes.
# IP changes will take ~5 minutes or so to propagate, but
# it will update.
send_data(tls)
send_data(tls, kube_api_endpoint)
@when('certificates.server.cert.available',
@ -554,7 +630,7 @@ def kick_api_server(tls):
if data_changed('cert', tls.get_server_cert()):
# certificate changed, so restart the api server
hookenv.log("Certificate information changed, restarting api server")
set_state('kube-apiserver.do-restart')
restart_apiserver()
tls_client.reset_certificate_write_flag('server')
@ -563,11 +639,13 @@ def configure_cdk_addons():
''' Configure CDK addons '''
remove_state('cdk-addons.configured')
dbEnabled = str(hookenv.config('enable-dashboard-addons')).lower()
dnsEnabled = str(hookenv.config('enable-kube-dns')).lower()
args = [
'arch=' + arch(),
'dns-ip=' + get_dns_ip(),
'dns-ip=' + get_deprecated_dns_ip(),
'dns-domain=' + hookenv.config('dns_domain'),
'enable-dashboard=' + dbEnabled
'enable-dashboard=' + dbEnabled,
'enable-kube-dns=' + dnsEnabled
]
check_call(['snap', 'set', 'cdk-addons'] + args)
if not addons_ready():
@ -691,7 +769,7 @@ def ceph_storage(ceph_admin):
cmd = ['kubectl', 'apply', '-f', '/tmp/ceph-secret.yaml']
check_call(cmd)
os.remove('/tmp/ceph-secret.yaml')
except: # NOQA
except: # NOQA
# the enlistment in kubernetes failed, return and prepare for re-exec
return
@ -760,7 +838,7 @@ def is_privileged():
"""Return boolean indicating whether or not to set allow-privileged=true.
"""
privileged = hookenv.config('allow-privileged')
privileged = hookenv.config('allow-privileged').lower()
if privileged == 'auto':
return is_state('kubernetes-master.gpu.enabled')
else:
@ -779,9 +857,11 @@ def on_config_allow_privileged_change():
@when('config.changed.api-extra-args')
@when('kubernetes-master.components.started')
@when('leadership.set.auto_storage_backend')
@when('etcd.available')
def on_config_api_extra_args_change(etcd):
configure_apiserver(etcd)
configure_apiserver(etcd.get_connection_string(),
getStorageBackend())
@when('config.changed.controller-manager-extra-args')
@ -806,7 +886,7 @@ def on_gpu_available(kube_control):
"""
config = hookenv.config()
if config['allow-privileged'] == "false":
if config['allow-privileged'].lower() == "false":
hookenv.status_set(
'active',
'GPUs available. Set allow-privileged="auto" to enable.'
@ -837,42 +917,25 @@ def shutdown():
service_stop('snap.kube-scheduler.daemon')
@when('kube-apiserver.do-restart')
def restart_apiserver():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-apiserver')
host.service_restart('snap.kube-apiserver.daemon')
hookenv.status_set(prev_state, prev_msg)
remove_state('kube-apiserver.do-restart')
set_state('kube-apiserver.started')
@when('kube-controller-manager.do-restart')
def restart_controller_manager():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-controller-manager')
host.service_restart('snap.kube-controller-manager.daemon')
hookenv.status_set(prev_state, prev_msg)
remove_state('kube-controller-manager.do-restart')
set_state('kube-controller-manager.started')
@when('kube-scheduler.do-restart')
def restart_scheduler():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-scheduler')
host.service_restart('snap.kube-scheduler.daemon')
hookenv.status_set(prev_state, prev_msg)
remove_state('kube-scheduler.do-restart')
set_state('kube-scheduler.started')
@when_all('kube-apiserver.started',
'kube-controller-manager.started',
'kube-scheduler.started')
@when_not('kubernetes-master.components.started')
def componenets_started():
set_state('kubernetes-master.components.started')
def arch():
@ -951,9 +1014,16 @@ def create_kubeconfig(kubeconfig, server, ca, key=None, certificate=None,
def get_dns_ip():
'''Get an IP address for the DNS server on the provided cidr.'''
cmd = "kubectl get service --namespace kube-system kube-dns --output json"
output = check_output(cmd, shell=True).decode()
svc = json.loads(output)
return svc['spec']['clusterIP']
def get_deprecated_dns_ip():
'''We previously hardcoded the dns ip. This function returns the old
hardcoded value for use with older versions of cdk_addons.'''
interface = ipaddress.IPv4Interface(service_cidr())
# Add .10 at the end of the network
ip = interface.network.network_address + 10
return ip.exploded
@ -1018,7 +1088,7 @@ def configure_kubernetes_service(service, base_args, extra_args_key):
db.set(prev_args_key, args)
def configure_apiserver(etcd):
def configure_apiserver(etcd_connection_string, leader_etcd_version):
api_opts = {}
# Get the tls paths from the layer data.
@ -1048,11 +1118,12 @@ def configure_apiserver(etcd):
api_opts['logtostderr'] = 'true'
api_opts['insecure-bind-address'] = '127.0.0.1'
api_opts['insecure-port'] = '8080'
api_opts['storage-backend'] = 'etcd2' # FIXME: add etcd3 support
api_opts['storage-backend'] = leader_etcd_version
api_opts['basic-auth-file'] = '/root/cdk/basic_auth.csv'
api_opts['token-auth-file'] = '/root/cdk/known_tokens.csv'
api_opts['service-account-key-file'] = '/root/cdk/serviceaccount.key'
api_opts['kubelet-preferred-address-types'] = \
'[InternalIP,Hostname,InternalDNS,ExternalDNS,ExternalIP]'
etcd_dir = '/root/cdk/etcd'
etcd_ca = os.path.join(etcd_dir, 'client-ca.pem')
@ -1062,7 +1133,7 @@ def configure_apiserver(etcd):
api_opts['etcd-cafile'] = etcd_ca
api_opts['etcd-keyfile'] = etcd_key
api_opts['etcd-certfile'] = etcd_cert
api_opts['etcd-servers'] = etcd.get_connection_string()
api_opts['etcd-servers'] = etcd_connection_string
admission_control = [
'Initializers',
@ -1088,8 +1159,7 @@ def configure_apiserver(etcd):
api_opts['admission-control'] = ','.join(admission_control)
configure_kubernetes_service('kube-apiserver', api_opts, 'api-extra-args')
set_state('kube-apiserver.do-restart')
restart_apiserver()
def configure_controller_manager():
@ -1099,7 +1169,7 @@ def configure_controller_manager():
layer_options = layer.options('tls-client')
ca_cert_path = layer_options.get('ca_certificate_path')
# Default to 3 minute resync. TODO: Make this configureable?
# Default to 3 minute resync. TODO: Make this configurable?
controller_opts['min-resync-period'] = '3m'
controller_opts['v'] = '2'
controller_opts['root-ca-file'] = ca_cert_path
@ -1111,8 +1181,7 @@ def configure_controller_manager():
configure_kubernetes_service('kube-controller-manager', controller_opts,
'controller-manager-extra-args')
set_state('kube-controller-manager.do-restart')
restart_controller_manager()
def configure_scheduler():
@ -1125,7 +1194,7 @@ def configure_scheduler():
configure_kubernetes_service('kube-scheduler', scheduler_opts,
'scheduler-extra-args')
set_state('kube-scheduler.do-restart')
restart_scheduler()
def setup_basic_auth(password=None, username='admin', uid='admin',
@ -1216,7 +1285,9 @@ def all_kube_system_pods_running():
result = json.loads(output)
for pod in result['items']:
status = pod['status']['phase']
if status != 'Running':
# Evicted nodes should re-spawn
if status != 'Running' and \
pod['status'].get('reason', '') != 'Evicted':
return False
return True
@ -1233,3 +1304,10 @@ def touch(fname):
os.utime(fname, None)
except OSError:
open(fname, 'a').close()
def getStorageBackend():
storage_backend = hookenv.config('storage-backend')
if storage_backend == 'auto':
storage_backend = leader_get('auto_storage_backend')
return storage_backend

View File

@ -22,7 +22,7 @@ options:
switch to privileged mode if gpu hardware is detected.
channel:
type: string
default: "1.8/stable"
default: "1.9/stable"
description: |
Snap channel to install Kubernetes worker services from
require-manual-upgrade:
@ -49,3 +49,24 @@ options:
runtime-config=batch/v2alpha1=true profiling=true
will result in kube-apiserver being run with the following options:
--runtime-config=batch/v2alpha1=true --profiling=true
docker-logins:
type: string
default: "[]"
description: |
Docker login credentials. Setting this config allows Kubelet to pull images from
registries where auth is required.
The value for this config must be a JSON array of credential objects, like this:
[{"server": "my.registry", "username": "myUser", "password": "myPass"}]
nginx-image:
type: string
default: "auto"
description: |
Docker image to use for the nginx ingress controller. Auto will select an image
based on architecture.
default-backend-image:
type: string
default: "auto"
description: |
Docker image to use for the default backend. Auto will select an image
based on architecture.

View File

@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import random
import shutil
@ -23,7 +24,7 @@ import time
from shlex import split
from subprocess import check_call, check_output
from subprocess import CalledProcessError
from socket import gethostname
from socket import gethostname, getfqdn
from charms import layer
from charms.layer import snap
@ -62,6 +63,10 @@ def upgrade_charm():
cleanup_pre_snap_services()
check_resources_for_upgrade_needed()
# Remove the RC for nginx ingress if it exists
if hookenv.config().get('ingress'):
kubectl_success('delete', 'rc', 'nginx-ingress-controller')
# Remove gpu.enabled state so we can reconfigure gpu-related kubelet flags,
# since they can differ between k8s versions
remove_state('kubernetes-worker.gpu.enabled')
@ -69,6 +74,7 @@ def upgrade_charm():
remove_state('kubernetes-worker.cni-plugins.installed')
remove_state('kubernetes-worker.config.created')
remove_state('kubernetes-worker.ingress.available')
remove_state('worker.auth.bootstrapped')
set_state('kubernetes-worker.restart-needed')
@ -157,7 +163,7 @@ def shutdown():
'''
try:
if os.path.isfile(kubeconfig_path):
kubectl('delete', 'node', gethostname())
kubectl('delete', 'node', gethostname().lower())
except CalledProcessError:
hookenv.log('Failed to unregister node.')
service_stop('snap.kubelet.daemon')
@ -270,17 +276,34 @@ def update_kubelet_status():
hookenv.status_set('waiting', msg)
@when('certificates.available')
def send_data(tls):
def get_ingress_address(relation):
try:
network_info = hookenv.network_get(relation.relation_name)
except NotImplementedError:
network_info = []
if network_info and 'ingress-addresses' in network_info:
# just grab the first one for now, maybe be more robust here?
return network_info['ingress-addresses'][0]
else:
# if they don't have ingress-addresses they are running a juju that
# doesn't support spaces, so just return the private address
return hookenv.unit_get('private-address')
@when('certificates.available', 'kube-control.connected')
def send_data(tls, kube_control):
'''Send the data that is required to create a server certificate for
this server.'''
# Use the public ip of this unit as the Common Name for the certificate.
common_name = hookenv.unit_public_ip()
ingress_ip = get_ingress_address(kube_control)
# Create SANs that the tls layer will add to the server cert.
sans = [
hookenv.unit_public_ip(),
hookenv.unit_private_ip(),
ingress_ip,
gethostname()
]
@ -323,6 +346,7 @@ def start_worker(kube_api, kube_control, auth_control, cni):
# the correct DNS even though the server isn't ready yet.
dns = kube_control.get_dns()
ingress_ip = get_ingress_address(kube_control)
cluster_cidr = cni.get_config()['cidr']
if cluster_cidr is None:
@ -336,12 +360,12 @@ def start_worker(kube_api, kube_control, auth_control, cni):
set_privileged()
create_config(random.choice(servers), creds)
configure_kubelet(dns)
configure_kubelet(dns, ingress_ip)
configure_kube_proxy(servers, cluster_cidr)
set_state('kubernetes-worker.config.created')
restart_unit_services()
update_kubelet_status()
apply_node_labels()
set_state('kubernetes-worker.label-config-required')
remove_state('kubernetes-worker.restart-needed')
@ -372,7 +396,7 @@ def sdn_changed():
@when('kubernetes-worker.config.created')
@when_not('kubernetes-worker.ingress.available')
def render_and_launch_ingress():
''' If configuration has ingress RC enabled, launch the ingress load
''' If configuration has ingress daemon set enabled, launch the ingress load
balancer and default http backend. Otherwise attempt deletion. '''
config = hookenv.config()
# If ingress is enabled, launch the ingress controller
@ -383,50 +407,59 @@ def render_and_launch_ingress():
kubectl_manifest('delete',
'/root/cdk/addons/default-http-backend.yaml')
kubectl_manifest('delete',
'/root/cdk/addons/ingress-replication-controller.yaml') # noqa
'/root/cdk/addons/ingress-daemon-set.yaml') # noqa
hookenv.close_port(80)
hookenv.close_port(443)
@when('kubernetes-worker.ingress.available')
def scale_ingress_controller():
''' Scale the number of ingress controller replicas to match the number of
nodes. '''
try:
output = kubectl('get', 'nodes', '-o', 'name')
count = len(output.splitlines())
kubectl('scale', '--replicas=%d' % count, 'rc/nginx-ingress-controller') # noqa
except CalledProcessError:
hookenv.log('Failed to scale ingress controllers. Will attempt again next update.') # noqa
@when('config.changed.labels')
def handle_labels_changed():
set_state('kubernetes-worker.label-config-required')
@when('config.changed.labels', 'kubernetes-worker.config.created')
@when('kubernetes-worker.label-config-required',
'kubernetes-worker.config.created')
def apply_node_labels():
''' Parse the labels configuration option and apply the labels to the node.
'''
# scrub and try to format an array from the configuration option
''' Parse the labels configuration option and apply the labels to the
node. '''
# Get the user's configured labels.
config = hookenv.config()
user_labels = _parse_labels(config.get('labels'))
# For diffing sake, iterate the previous label set
if config.previous('labels'):
previous_labels = _parse_labels(config.previous('labels'))
hookenv.log('previous labels: {}'.format(previous_labels))
else:
# this handles first time run if there is no previous labels config
previous_labels = _parse_labels("")
# Calculate label removal
for label in previous_labels:
if label not in user_labels:
hookenv.log('Deleting node label {}'.format(label))
_apply_node_label(label, delete=True)
# if the label is in user labels we do nothing here, it will get set
# during the atomic update below.
# Atomically set a label
for label in user_labels:
_apply_node_label(label, overwrite=True)
user_labels = {}
for item in config.get('labels').split(' '):
if '=' in item:
key, val = item.split('=')
user_labels[key] = val
else:
hookenv.log('Skipping malformed option: {}.'.format(item))
# Collect the current label state.
current_labels = db.get('current_labels') or {}
# Remove any labels that the user has removed from the config.
for key in list(current_labels.keys()):
if key not in user_labels:
try:
remove_label(key)
del current_labels[key]
db.set('current_labels', current_labels)
except ApplyNodeLabelFailed as e:
hookenv.log(str(e))
return
# Add any new labels.
for key, val in user_labels.items():
try:
set_label(key, val)
current_labels[key] = val
db.set('current_labels', current_labels)
except ApplyNodeLabelFailed as e:
hookenv.log(str(e))
return
# Set the juju-application label.
try:
set_label('juju-application', hookenv.service_name())
except ApplyNodeLabelFailed as e:
hookenv.log(str(e))
return
# Label configuration complete.
remove_state('kubernetes-worker.label-config-required')
@when_any('config.changed.kubelet-extra-args',
@ -435,6 +468,53 @@ def extra_args_changed():
set_state('kubernetes-worker.restart-needed')
@when('config.changed.docker-logins')
def docker_logins_changed():
"""Set a flag to handle new docker login options.
If docker daemon options have also changed, set a flag to ensure the
daemon is restarted prior to running docker login.
"""
config = hookenv.config()
if data_changed('docker-opts', config['docker-opts']):
hookenv.log('Found new docker daemon options. Requesting a restart.')
# State will be removed by layer-docker after restart
set_state('docker.restart')
set_state('kubernetes-worker.docker-login')
@when('kubernetes-worker.docker-login')
@when_not('docker.restart')
def run_docker_login():
"""Login to a docker registry with configured credentials."""
config = hookenv.config()
previous_logins = config.previous('docker-logins')
logins = config['docker-logins']
logins = json.loads(logins)
if previous_logins:
previous_logins = json.loads(previous_logins)
next_servers = {login['server'] for login in logins}
previous_servers = {login['server'] for login in previous_logins}
servers_to_logout = previous_servers - next_servers
for server in servers_to_logout:
cmd = ['docker', 'logout', server]
subprocess.check_call(cmd)
for login in logins:
server = login['server']
username = login['username']
password = login['password']
cmd = ['docker', 'login', server, '-u', username, '-p', password]
subprocess.check_call(cmd)
remove_state('kubernetes-worker.docker-login')
set_state('kubernetes-worker.restart-needed')
def arch():
'''Return the package architecture as a string. Raise an exception if the
architecture is not supported by kubernetes.'''
@ -505,7 +585,7 @@ def configure_kubernetes_service(service, base_args, extra_args_key):
db.set(prev_args_key, args)
def configure_kubelet(dns):
def configure_kubelet(dns, ingress_ip):
layer_options = layer.options('tls-client')
ca_cert_path = layer_options.get('ca_certificate_path')
server_cert_path = layer_options.get('server_certificate_path')
@ -518,7 +598,6 @@ def configure_kubelet(dns):
kubelet_opts['v'] = '0'
kubelet_opts['address'] = '0.0.0.0'
kubelet_opts['port'] = '10250'
kubelet_opts['cluster-dns'] = dns['sdn-ip']
kubelet_opts['cluster-domain'] = dns['domain']
kubelet_opts['anonymous-auth'] = 'false'
kubelet_opts['client-ca-file'] = ca_cert_path
@ -526,6 +605,10 @@ def configure_kubelet(dns):
kubelet_opts['tls-private-key-file'] = server_key_path
kubelet_opts['logtostderr'] = 'true'
kubelet_opts['fail-swap-on'] = 'false'
kubelet_opts['node-ip'] = ingress_ip
if (dns['enable-kube-dns']):
kubelet_opts['cluster-dns'] = dns['sdn-ip']
privileged = is_state('kubernetes-worker.privileged')
kubelet_opts['allow-privileged'] = 'true' if privileged else 'false'
@ -548,6 +631,7 @@ def configure_kube_proxy(api_servers, cluster_cidr):
kube_proxy_opts['logtostderr'] = 'true'
kube_proxy_opts['v'] = '0'
kube_proxy_opts['master'] = random.choice(api_servers)
kube_proxy_opts['hostname-override'] = get_node_name()
if b'lxc' in check_output('virt-what', shell=True):
kube_proxy_opts['conntrack-max-per-core'] = '0'
@ -599,12 +683,32 @@ def create_kubeconfig(kubeconfig, server, ca, key=None, certificate=None,
check_call(split(cmd.format(kubeconfig, context)))
@when_any('config.changed.default-backend-image',
'config.changed.nginx-image')
@when('kubernetes-worker.config.created')
def launch_default_ingress_controller():
''' Launch the Kubernetes ingress controller & default backend (404) '''
config = hookenv.config()
# need to test this in case we get in
# here from a config change to the image
if not config.get('ingress'):
return
context = {}
context['arch'] = arch()
addon_path = '/root/cdk/addons/{}'
context['defaultbackend_image'] = config.get('default-backend-image')
if (context['defaultbackend_image'] == "" or
context['defaultbackend_image'] == "auto"):
if context['arch'] == 's390x':
context['defaultbackend_image'] = \
"k8s.gcr.io/defaultbackend-s390x:1.4"
else:
context['defaultbackend_image'] = \
"k8s.gcr.io/defaultbackend:1.4"
# Render the default http backend (404) replicationcontroller manifest
manifest = addon_path.format('default-http-backend.yaml')
render('default-http-backend.yaml', manifest, context)
@ -618,15 +722,19 @@ def launch_default_ingress_controller():
hookenv.close_port(443)
return
# Render the ingress replication controller manifest
context['ingress_image'] = \
"gcr.io/google_containers/nginx-ingress-controller:0.9.0-beta.13"
if arch() == 's390x':
context['ingress_image'] = \
"docker.io/cdkbot/nginx-ingress-controller-s390x:0.9.0-beta.13"
manifest = addon_path.format('ingress-replication-controller.yaml')
render('ingress-replication-controller.yaml', manifest, context)
hookenv.log('Creating the ingress replication controller.')
# Render the ingress daemon set controller manifest
context['ingress_image'] = config.get('nginx-image')
if context['ingress_image'] == "" or context['ingress_image'] == "auto":
if context['arch'] == 's390x':
context['ingress_image'] = \
"docker.io/cdkbot/nginx-ingress-controller-s390x:0.9.0-beta.13"
else:
context['ingress_image'] = \
"k8s.gcr.io/nginx-ingress-controller:0.9.0-beta.15" # noqa
context['juju_application'] = hookenv.service_name()
manifest = addon_path.format('ingress-daemon-set.yaml')
render('ingress-daemon-set.yaml', manifest, context)
hookenv.log('Creating the ingress daemon set.')
try:
kubectl('apply', '-f', manifest)
except CalledProcessError as e:
@ -670,7 +778,7 @@ def kubectl(*args):
def kubectl_success(*args):
''' Runs kubectl with the given args. Returns True if succesful, False if
''' Runs kubectl with the given args. Returns True if successful, False if
not. '''
try:
kubectl(*args)
@ -742,7 +850,7 @@ def set_privileged():
"""Update the allow-privileged flag for kubelet.
"""
privileged = hookenv.config('allow-privileged')
privileged = hookenv.config('allow-privileged').lower()
if privileged == 'auto':
gpu_enabled = is_state('kubernetes-worker.gpu.enabled')
privileged = 'true' if gpu_enabled else 'false'
@ -790,8 +898,8 @@ def enable_gpu():
return
# Apply node labels
_apply_node_label('gpu=true', overwrite=True)
_apply_node_label('cuda=true', overwrite=True)
set_label('gpu', 'true')
set_label('cuda', 'true')
set_state('kubernetes-worker.gpu.enabled')
set_state('kubernetes-worker.restart-needed')
@ -811,8 +919,8 @@ def disable_gpu():
hookenv.log('Disabling gpu mode')
# Remove node labels
_apply_node_label('gpu', delete=True)
_apply_node_label('cuda', delete=True)
remove_label('gpu')
remove_label('cuda')
remove_state('kubernetes-worker.gpu.enabled')
set_state('kubernetes-worker.restart-needed')
@ -844,24 +952,23 @@ def request_kubelet_and_proxy_credentials(kube_control):
# The kube-cotrol interface is created to support RBAC.
# At this point we might as well do the right thing and return the hostname
# even if it will only be used when we enable RBAC
nodeuser = 'system:node:{}'.format(gethostname())
nodeuser = 'system:node:{}'.format(get_node_name().lower())
kube_control.set_auth_request(nodeuser)
@when('kube-control.connected')
def catch_change_in_creds(kube_control):
"""Request a service restart in case credential updates were detected."""
nodeuser = 'system:node:{}'.format(gethostname())
nodeuser = 'system:node:{}'.format(get_node_name().lower())
creds = kube_control.get_auth_credentials(nodeuser)
if creds \
and data_changed('kube-control.creds', creds) \
and creds['user'] == nodeuser:
if creds and creds['user'] == nodeuser:
# We need to cache the credentials here because if the
# master changes (master leader dies and replaced by a new one)
# the new master will have no recollection of our certs.
db.set('credentials', creds)
set_state('worker.auth.bootstrapped')
set_state('kubernetes-worker.restart-needed')
if data_changed('kube-control.creds', creds):
set_state('kubernetes-worker.restart-needed')
@when_not('kube-control.connected')
@ -899,47 +1006,46 @@ def _systemctl_is_active(application):
return False
def get_node_name():
kubelet_extra_args = parse_extra_args('kubelet-extra-args')
cloud_provider = kubelet_extra_args.get('cloud-provider', '')
if cloud_provider == 'aws':
return getfqdn()
else:
return gethostname()
class ApplyNodeLabelFailed(Exception):
pass
def _apply_node_label(label, delete=False, overwrite=False):
''' Invoke kubectl to apply node label changes '''
hostname = gethostname()
# TODO: Make this part of the kubectl calls instead of a special string
cmd_base = 'kubectl --kubeconfig={0} label node {1} {2}'
if delete is True:
label_key = label.split('=')[0]
cmd = cmd_base.format(kubeconfig_path, hostname, label_key)
cmd = cmd + '-'
else:
cmd = cmd_base.format(kubeconfig_path, hostname, label)
if overwrite:
cmd = '{} --overwrite'.format(cmd)
cmd = cmd.split()
deadline = time.time() + 60
def persistent_call(cmd, retry_message):
deadline = time.time() + 180
while time.time() < deadline:
code = subprocess.call(cmd)
if code == 0:
break
hookenv.log('Failed to apply label %s, exit code %d. Will retry.' % (
label, code))
return True
hookenv.log(retry_message)
time.sleep(1)
else:
msg = 'Failed to apply label %s' % label
raise ApplyNodeLabelFailed(msg)
return False
def _parse_labels(labels):
''' Parse labels from a key=value string separated by space.'''
label_array = labels.split(' ')
sanitized_labels = []
for item in label_array:
if '=' in item:
sanitized_labels.append(item)
else:
hookenv.log('Skipping malformed option: {}'.format(item))
return sanitized_labels
def set_label(label, value):
nodename = get_node_name()
cmd = 'kubectl --kubeconfig={0} label node {1} {2}={3} --overwrite'
cmd = cmd.format(kubeconfig_path, nodename, label, value)
cmd = cmd.split()
retry = 'Failed to apply label %s=%s. Will retry.' % (label, value)
if not persistent_call(cmd, retry):
raise ApplyNodeLabelFailed(retry)
def remove_label(label):
nodename = get_node_name()
cmd = 'kubectl --kubeconfig={0} label node {1} {2}-'
cmd = cmd.format(kubeconfig_path, nodename, label)
cmd = cmd.split()
retry = 'Failed to remove label {0}. Will retry.'.format(label)
if not persistent_call(cmd, retry):
raise ApplyNodeLabelFailed(retry)

View File

@ -14,10 +14,10 @@ spec:
terminationGracePeriodSeconds: 60
containers:
- name: default-http-backend
# Any image is permissable as long as:
# Any image is permissible as long as:
# 1. It serves a 404 page at /
# 2. It serves 200 on a /healthz endpoint
image: gcr.io/google_containers/defaultbackend:1.0
image: {{ defaultbackend_image }}
livenessProbe:
httpGet:
path: /healthz

View File

@ -1,12 +1,12 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nginx-ingress-serviceaccount
name: nginx-ingress-{{ juju_application }}-serviceaccount
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: nginx-ingress-clusterrole
name: nginx-ingress-{{ juju_application }}-clusterrole
rules:
- apiGroups:
- ""
@ -58,7 +58,7 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: nginx-ingress-role
name: nginx-ingress-{{ juju_application }}-role
rules:
- apiGroups:
- ""
@ -100,57 +100,58 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: nginx-ingress-role-nisa-binding
name: nginx-ingress-role-nisa-{{ juju_application }}-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: nginx-ingress-role
name: nginx-ingress-{{ juju_application }}-role
subjects:
- kind: ServiceAccount
name: nginx-ingress-serviceaccount
name: nginx-ingress-{{ juju_application }}-serviceaccount
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: nginx-ingress-clusterrole-nisa-binding
name: nginx-ingress-clusterrole-nisa-{{ juju_application }}-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: nginx-ingress-clusterrole
name: nginx-ingress-{{ juju_application }}-clusterrole
subjects:
- kind: ServiceAccount
name: nginx-ingress-serviceaccount
name: nginx-ingress-{{ juju_application }}-serviceaccount
namespace: default
---
apiVersion: v1
kind: ConfigMap
metadata:
name: nginx-load-balancer-conf
name: nginx-load-balancer-{{ juju_application }}-conf
---
apiVersion: v1
kind: ReplicationController
apiVersion: apps/v1beta2
kind: DaemonSet
metadata:
name: nginx-ingress-controller
name: nginx-ingress-{{ juju_application }}-controller
labels:
k8s-app: nginx-ingress-lb
juju-application: nginx-ingress-{{ juju_application }}
spec:
replicas: 1
selector:
k8s-app: nginx-ingress-lb
matchLabels:
name: nginx-ingress-{{ juju_application }}
template:
metadata:
labels:
k8s-app: nginx-ingress-lb
name: nginx-ingress-lb
name: nginx-ingress-{{ juju_application }}
spec:
nodeSelector:
juju-application: {{ juju_application }}
terminationGracePeriodSeconds: 60
# hostPort doesn't work with CNI, so we have to use hostNetwork instead
# see https://github.com/kubernetes/kubernetes/issues/23920
hostNetwork: true
serviceAccountName: nginx-ingress-serviceaccount
serviceAccountName: nginx-ingress-{{ juju_application }}-serviceaccount
containers:
- image: {{ ingress_image }}
name: nginx-ingress-lb
name: nginx-ingress-{{ juju_application }}
imagePullPolicy: Always
livenessProbe:
httpGet: