root_user: password_hash: {{ password "root" "sha512crypt" }} authorized_keys: {{- range .vars.ssh_keys }} - "{{ . }}" {{- end }} {{- if .vars.additional_ssh_keys }} {{- range .vars.additional_ssh_keys }} - "{{ . }}" {{- end }} {{- end }} layers: # it's TOP to bottom - kubernetes{{if .vars.cri_o}}-crio{{end}} - init - modules - system {{ if .vars.modules -}} modules: {{- range .vars.modules }} - {{ . }} {{- end }} {{- end }} mounts: - dev: /dev/{{ if .vars.encrypt_disks }}mapper{{ else }}storage{{ end }}/varlog path: /var/log - dev: /dev/{{ if .vars.encrypt_disks }}mapper{{ else }}storage{{ end }}/kubelet path: /var/lib/kubelet {{- if .vars.cri_o }} - dev: /dev/{{ if .vars.encrypt_disks }}mapper{{ else }}storage{{ end }}/crio path: /var/lib/crio {{- else }} - dev: /dev/{{ if .vars.encrypt_disks }}mapper{{ else }}storage{{ end }}/containerd path: /var/lib/containerd {{- end }} {{ if .vars.is_master }} - dev: /dev/{{ if .vars.encrypt_disks }}mapper{{ else }}storage{{ end }}/etcd path: /var/lib/etcd {{ end }} {{ if and .vars.novit .vars.novit.vpn .vars.novit.vpn.site_prefix .vars.novit.vpn.ip_suffix }} vpns: - name: wgnovit port: 39519 ips: - fd6e:7674:{{ .vars.novit.vpn.site_prefix}}:{{ .vars.novit.vpn.ip_suffix }}/32 peers: - public_key: hVK5qiuJBqKtankcMI8MZtDNfI3h4U/nk2qMRfw35UE= endpoint: ip: 141.95.173.143 port: 39519 keepalive: 30s allowed_ips: - fd6e:7674::/80 {{ end }} files: {{- define "rc_dkl" }} - path: "/etc/runlevels/default/dkl-{{.}}" symlink: "../../init.d/dkl-{{.}}" - path: "/etc/init.d/dkl-{{.}}" mode: 0o755 content: | #! /sbin/openrc-run description="dkl: {{.}}" supervisor=supervise-daemon respawn_delay=8 respawn_max=0 command="/sbin/dkl logger /etc/direktil/services/{{.}}" depend() { after local ; } {{- end }} - path: /etc/systemd/system/dkl.service content: | [Service] {{- if not .vars.disable_secure_routes }} ExecStartPre=/etc/local.d/secure-routes.start {{- end }} ExecStartPre=/sbin/dkl dynlay kubernetes {{ .host.versions.kubernetes }} ExecStart=/sbin/dkl init services Restart=always RestartSec=10 {{- if .vars.proxy }} Environment=HTTP_PROXY={{.vars.proxy}} HTTPS_PROXY={{.vars.proxy}} NO_PROXY="{{.vars.no_proxy}}" {{- end }} [Unit] After=network-online.target [Install] WantedBy=multi-user.target - path: /etc/machine-id content: | {{ machine_id }} - path: /etc/inittab content: | id:3:initdefault: si::sysinit:/sbin/openrc sysinit rc::bootwait:/sbin/openrc boot l0u:0:wait:/sbin/telinit u l0:0:wait:/sbin/openrc shutdown l0s:0:wait:/sbin/halt -dhnp l1:1:wait:/sbin/openrc single l2:2:wait:/sbin/openrc nonetwork l3:3:wait:/sbin/openrc default l4:4:wait:/sbin/openrc default l5:5:wait:/sbin/openrc default l6u:6:wait:/sbin/telinit u l6:6:wait:/sbin/openrc reboot l6r:6:wait:/sbin/reboot -dkn su0:S:wait:/sbin/openrc single su1:S:wait:/sbin/sulogin c1:12345:respawn:/sbin/agetty --noclear 38400 tty1 linux c2:2345:respawn:/sbin/agetty 38400 tty2 linux {{- if .vars.enable_serial_console }} s0:12345:respawn:/sbin/agetty --noclear -L 115200 ttyS0 vt100 {{- end }} ca:12345:ctrlaltdel:/sbin/shutdown -r now - path: /etc/rc.conf content: | rc_shell=/sbin/sulogin rc_logger="YES" #rc_log_path="/var/log/rc.log" unicode="YES" rc_tty_number=12 rc_need="!net" - path: /etc/conf.d/netmount content: "" - path: /etc/hostname content: "{{host_name}}\n" - path: /etc/hosts content: | 127.0.0.1 localhost {{host_name}}{{ if not .vars.public_vip }} kubernetes{{end}} ::1 localhost {{host_name}}{{ if not .vars.public_vip }} kubernetes{{end}} {{ if .vars.public_vip }} {{ .vars.public_vip }} kubernetes {{- else }}{{ range shuffled_hosts_by_group .vars.master_group }} {{ .ip }} kubernetes {{- end }}{{ end }} {{ with .vars.additional_hosts }}{{- range . }} {{ . }} {{- end }}{{ end }} - path: /etc/resolv.conf content: | {{ if .vars.dns -}} nameserver {{ .vars.dns }} {{ end -}} {{ if .vars.dnses -}} {{ range .vars.dnses }} nameserver {{ . }} {{ end -}} {{- end }} - path: /etc/sysctl.conf content: | fs.file-max = 20971520 fs.inotify.max_user_watches = 1048576 kernel.pid_max = 1048576 net.ipv4.ip_forward = 1 vm.max_map_count = 262144 net.ipv4.neigh.default.gc_thresh1 = 16384 net.ipv4.neigh.default.gc_thresh2 = 28672 net.ipv4.neigh.default.gc_thresh3 = 32768 {{ if .vars.enable_mtu_probing -}} net.ipv4.tcp_mtu_probing = 2 {{- end }} - path: /etc/udev/rules.d/50-io-scheduler.rules content: | ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/scheduler}="bfq", ATTR{queue/nr_requests}="1024" ACTION=="add|change", KERNEL=="nvme[0-9]n[0-9]", ATTR{queue/scheduler}="bfq", ATTR{queue/nr_requests}="2048" {{ with .vars.additional_certs }} - path: /usr/local/share/ca-certificates/novit.crt content: | {{ . | indent " " }} {{ end }} # ------------------------------------------------------------------------- {{ ssh_user_ca "/etc/ssh/user_ca.pub" }} {{ ssh_host_keys "/etc/ssh" }} - path: /etc/ssh/sshd_config mode: 0o600 content: | TrustedUserCAKeys /etc/ssh/user_ca.pub Include "/etc/ssh/sshd_config.d/*.conf" # ------------------------------------------------------------------------ {{ if .vars.is_master }} # certificates for etcd servers {{ tls_dir "etcd-server" }} {{ tls_dir "etcd-peer" }} # certificates for etcd clients {{ tls_dir "etcd-client" }} # cluster certificates {{ ca_dir "cluster" }} {{ ca_dir "service-accounts" }} {{ tls_dir "apiserver" }} {{ tls_dir "kubelet-client" }} {{ tls_dir "proxy-client" }} {{ end }} {{ tls_dir "cluster-client" }} {{ if .vars.is_master -}} - path: /etc/kubernetes/token-auth.csv mode: 0o600 content: | {{ token "bootstrap" }},kubelet-bootstrap,10001,"system:bootstrappers" {{ token "admin" }},admin-token,10002,"system:masters" {{- end }} # ------------------------------------------------------------------------ - path: /etc/runlevels/default/chrony symlink: ../../init.d/chrony - path: /etc/chrony/chrony.conf mode: 0o644 content: | {{ if .vars.ntp_servers -}} {{ range .vars.ntp_servers -}} server {{ . }} iburst {{ end -}} {{ else -}} server 0.gentoo.pool.ntp.org iburst server 1.gentoo.pool.ntp.org iburst server 2.gentoo.pool.ntp.org iburst server 3.gentoo.pool.ntp.org iburst {{- end }} driftfile /var/lib/chrony/drift makestep 1.0 3 rtcsync {{ if .vars.ntp_server_mode -}} allow all {{- end }} {{ if .vars.is_vmware -}} # ------------------------------------------------------------------------ {{ template "rc_dkl" "vmtoolsd" }} - path: /etc/direktil/services/vmtoolsd mode: 0o755 content: | #! /bin/sh # --- # restart: 3 set -ex exec /usr/bin/vmtoolsd {{- end }} {{ if .vars.novit_host_token }} # ------------------------------------------------------------------------ {{ template "rc_dkl" "novit-connect" }} - path: /etc/direktil/services/novit-connect mode: 0o755 content: | #! /bin/sh {{ if .vars.proxy -}} export HTTP_PROXY={{.vars.proxy}} export HTTPS_PROXY="$HTTP_PROXY" export NO_PROXY="{{.vars.no_proxy}}" {{- end }} exec /bin/novit-connect -token {{.vars.novit_host_token}} {{ end }} # ------------------------------------------------------------------------ {{ template "rc_dkl" "k8s-local-volumes" }} - path: /etc/direktil/services/k8s-local-volumes mode: 0o755 content: | #! /bin/sh # --- # restart: 3 while true do for dev in /dev/storage/k8s-pv-* do [ -e $dev ] || continue tgt=${dev/dev/mnt} [ -e $tgt ] || { mkdir -p $(dirname $tgt) ln -s $dev $tgt } done for dev in /dev/k8s-pv/* do [ -e $dev ] || continue tgt=/mnt/storage/k8s-pv__$(basename $dev) [ -e $tgt ] || { mkdir -p $(dirname $tgt) ln -s $dev $tgt } done for dev in /dev/mapper/k8s-pv-* do [ -e $dev ] || continue tgt=/mnt/storage/mapper__$(basename $dev) [ -e $tgt ] || { mkdir -p $(dirname $tgt) ln -s $dev $tgt } done sleep 10 done # ------------------------------------------------------------------------ {{- $cr := "containerd" }}{{ if .vars.cri_o }}{{ $cr = "crio" }}{{end}} {{ template "rc_dkl" $cr }} - path: /etc/direktil/services/{{$cr}} mode: 0o755 content: | #! /bin/bash set -ex cg=cpu,memory cgcreate -g $cg:podruntime cgclassify -g $cg:podruntime $$ ulimit -n 1048576 ulimit -u unlimited ulimit -c unlimited {{ if .vars.proxy -}} export HTTP_PROXY={{.vars.proxy}} export HTTPS_PROXY="$HTTP_PROXY" export NO_PROXY="{{.vars.no_proxy}}" {{- end }} {{- if .vars.cri_o }} exec /usr/bin/crio --root=/var/lib/crio {{- else }} exec /usr/bin/containerd --log-level info {{- end }} # ------------------------------------------------------------------------ {{ template "rc_dkl" "kubelet" }} - path: /etc/direktil/services/kubelet mode: 0o755 content: | #! /bin/sh set -ex cg=cpu,memory cgcreate -g $cg:podruntime cgclassify -g $cg:podruntime $$ ctr_sock="{{if .vars.cri_o}}/run/crio/crio.sock{{else}}/run/containerd/containerd.sock{{end}}" echo "waiting for $ctr_sock" while ! [ -e $ctr_sock ]; do sleep 1; done #ulimit -n 1048576 mkdir -p /var/lib/kubelet/manifests exec /usr/bin/kubelet \ --config=/etc/kubernetes/kubelet.yaml \ {{- if .vars.hostname_override }} --hostname-override={{.vars.hostname_override}} \ {{- end }} {{- range $k, $v := .labels }} --node-labels={{ $k }}={{$v}} \ {{- end }} --bootstrap-kubeconfig=/etc/kubernetes/bootstrap.kubeconfig \ --kubeconfig=/var/lib/kubelet/kubeconfig \ --hostname-override={{host_name}} \ --node-ip={{ default .vars.node_ip host_ip}} {{ if .vars.enable_nfs }} # ------------------------------------------------------------------------- - path: /etc/runlevels/default/rpcbind symlink: ../../init.d/rpcbind - path: /etc/runlevels/default/rpc.statd symlink: ../../init.d/rpc.statd {{ end }} {{ if .vars.enable_nfs }} {{ range .vars.nfs }} - path: /etc/runlevels/default/mount-nfs-{{ .name }} symlink: ../../init.d/dkl-svc {{ template "rc_dkl" (print "mount-nfs-" .name) }} - path: /etc/direktil/services/mount-nfs-{{ .name }} mode: 0o755 content: | #! /bin/sh # --- # restart: 3 while true do findmnt {{ .dst }} >/dev/null || { mkdir -p {{ .dst }} mount -t nfs -o {{ .opts }} {{ .src }} {{ .dst }} } sleep 60 done {{- end }} {{- end }} # ------------------------------------------------------------------------- - path: /etc/crictl.yaml content: | {{- if .vars.cri_o }} runtime-endpoint: "unix:///var/run/crio/crio.sock" {{- else }} runtime-endpoint: "unix:///run/containerd/containerd.sock" {{- end }} {{- if .vars.cri_o }} - path: /etc/crio/crio.conf.d/20-novit.conf mode: 0o640 content: | [crio.runtime] cgroup_manager = "cgroupfs" conmon_cgroup = "pod" {{- else }} - path: /etc/containerd/config.toml mode: 0o600 content: | version = 2 root = "/var/lib/containerd" state = "/run/containerd" plugin_dir = "" disabled_plugins = [] required_plugins = [] oom_score = 0 [grpc] address = "/run/containerd/containerd.sock" tcp_address = "" tcp_tls_cert = "" tcp_tls_key = "" uid = 0 gid = 0 max_recv_message_size = 16777216 max_send_message_size = 16777216 [ttrpc] address = "" uid = 0 gid = 0 [debug] address = "" uid = 0 gid = 0 level = "" [metrics] address = "" grpc_histogram = false [cgroup] path = "" [timeouts] "io.containerd.timeout.shim.cleanup" = "5s" "io.containerd.timeout.shim.load" = "5s" "io.containerd.timeout.shim.shutdown" = "3s" "io.containerd.timeout.task.state" = "2s" [plugins] [plugins."io.containerd.gc.v1.scheduler"] pause_threshold = 0.02 deletion_threshold = 0 mutation_threshold = 100 schedule_delay = "0s" startup_delay = "100ms" [plugins."io.containerd.grpc.v1.cri"] disable_tcp_service = true stream_server_address = "127.0.0.1" stream_server_port = "0" stream_idle_timeout = "4h0m0s" enable_selinux = false sandbox_image = "registry.k8s.io/pause:3.1" stats_collect_period = 10 systemd_cgroup = false enable_tls_streaming = false max_container_log_line_size = 16384 disable_cgroup = false disable_apparmor = false restrict_oom_score_adj = false max_concurrent_downloads = 3 disable_proc_mount = false [plugins."io.containerd.grpc.v1.cri".containerd] snapshotter = "overlayfs" default_runtime_name = "runc" no_pivot = false [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime] runtime_type = "" runtime_engine = "" runtime_root = "" privileged_without_host_devices = false [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime] runtime_type = "" runtime_engine = "" runtime_root = "" privileged_without_host_devices = false [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v1" runtime_engine = "" runtime_root = "" privileged_without_host_devices = false [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" max_conf_num = 1 conf_template = "" [plugins."io.containerd.grpc.v1.cri".registry] [plugins."io.containerd.grpc.v1.cri".registry.mirrors] {{ $mirror_cache := .vars.docker_registries_mirror_cache }} {{ $mirror_repos := .vars.docker_registries_mirror_repos }} {{ if $mirror_cache.enable }} {{- range $name, $path := $mirror_repos }} [plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{ $name }}"] endpoint = ["http://127.0.0.1:8585{{ $path }}"{{ if $mirror_cache.remote }}, "{{ $mirror_cache.remote}}{{ $path }}"{{ end }}] {{- end }} {{- end }} {{- range $name := .vars.http_registries }} [plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{ $name }}"] endpoint = ["http://{{ $name }}"] {{- end }} [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming] tls_cert_file = "" tls_key_file = "" [plugins."io.containerd.internal.v1.opt"] path = "/opt/containerd" [plugins."io.containerd.internal.v1.restart"] interval = "10s" [plugins."io.containerd.metadata.v1.bolt"] content_sharing_policy = "shared" [plugins."io.containerd.monitor.v1.cgroups"] no_prometheus = false [plugins."io.containerd.runtime.v1.linux"] shim = "containerd-shim" runtime = "runc" runtime_root = "" no_shim = false shim_debug = false [plugins."io.containerd.runtime.v2.task"] platforms = ["linux/amd64"] [plugins."io.containerd.service.v1.diff-service"] default = ["walking"] [plugins."io.containerd.snapshotter.v1.devmapper"] root_path = "" pool_name = "" base_image_size = "" {{- end }} # ------------------------------------------------------------------------- {{ $podPidsLimit := 4096 -}} - path: /etc/kubernetes/kubelet.yaml mode: 0o600 content: | kind: KubeletConfiguration apiVersion: kubelet.config.k8s.io/v1beta1 containerRuntimeEndpoint: "unix://{{if .vars.cri_o}}/run/crio/crio.sock{{else}}/run/containerd/containerd.sock{{end}}" staticPodPath: /var/lib/kubelet/manifests makeIPTablesUtilChains: {{ eq .vars.kube_proxy "proxy" }} clusterDomain: {{.cluster.domain}} clusterDNS: - {{.cluster.dns_svc_ip }} podCIDR: {{.cluster.subnets.pods}} address: 0.0.0.0 authentication: x509: clientCAFile: /etc/tls/cluster-client/ca.crt anonymous: enabled: false maxPods: {{.vars.kubelet.maxPods}} serializeImagePulls: false featureGates: {} serverTLSBootstrap: true rotateCertificates: true podPidsLimit: {{ $podPidsLimit }} containerLogMaxFiles: 2 containerLogMaxSize: 16Mi # cgroups configuration cgroupsPerQOS: true cgroupDriver: cgroupfs systemReservedCgroup: openrc systemReserved: cpu: "{{ .vars.system_reserved.cpu }}" memory: "{{ .vars.system_reserved.memory }}" kubeReservedCgroup: podruntime kubeReserved: cpu: "{{ .vars.kube_reserved.cpu }}" memory: "{{ .vars.kube_reserved.memory }}" #evictionHard: # memory.available: 100Mi {{ .vars.extra_kubelet_config | indent " " }} - path: /etc/kubernetes/haproxy-api.cfg content: | defaults mode tcp timeout client 2s timeout connect 5s timeout server 2s timeout tunnel 1m frontend k8s-api bind 127.0.0.1:6444 bind [::1]:6444 mode tcp default_backend k8s-api backend k8s-api mode tcp option tcp-check balance random default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100 {{- $apiPort := .vars.control_plane.api_port -}} {{- range $i, $host := hosts_by_group .vars.master_group }} server {{$host.name}}_0 {{$host.ip}}:{{ $apiPort }} check {{- end }} {{ if and .vars.docker_registries_mirror_cache.enable (not .vars.is_master) }} frontend dkr-reg-mirror bind 127.0.0.1:8585 bind [::1]:8585 mode tcp default_backend dkr-reg-mirror backend dkr-reg-mirror mode tcp option tcp-check balance random default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100 {{- range $i, $host := hosts_by_group .vars.master_group }} server {{$host.name}}_0 {{$host.ip}}:8585 check {{- end }} {{- end }} {{ tls_dir "node-bootstrap" }} - path: /etc/kubernetes/bootstrap.kubeconfig mode: 0o600 content: | apiVersion: v1 kind: Config preferences: {} current-context: local clusters: - cluster: certificate-authority: /etc/tls/cluster-client/ca.crt server: https://[::1]:6444 name: local contexts: - context: cluster: local user: kubelet-bootstrap name: local users: - name: kubelet-bootstrap user: client-key: /etc/tls/node-bootstrap/tls.key client-certificate: /etc/tls/node-bootstrap/tls.crt - path: /etc/kubernetes/control-plane/kubeconfig mode: 0o600 content: | apiVersion: v1 kind: Config preferences: {} current-context: local clusters: - cluster: certificate-authority: /etc/tls/cluster-client/ca.crt server: https://[::1]:6444 name: local contexts: - context: cluster: local user: control-plane name: local users: - name: control-plane user: token: {{ token "admin" }} {{ if eq .vars.cloud_provider "vsphere" }} - path: /etc/kubernetes/vsphere.conf mode: 0o600 content: | [Global] vm-name = "{{host_name}}" user = "{{.vars.vsphere.user}}" password = "{{.vars.vsphere.password}}" insecure-flag = "1" datacenters = "{{.vars.vsphere.datacenter}}" [Workspace] server = "{{.vars.vsphere.server}}" datacenter = "{{.vars.vsphere.datacenter}}" folder = "{{.vars.vsphere.folder}}" default-datastore = "{{.vars.vsphere.datastore}}" [VirtualCenter "{{.vars.vsphere.server}}"] [Disk] scsicontrollertype = pvscsi {{ end }} {{ if .vars.is_master }} {{ static_pods_files "/etc/kubernetes/manifests.bootstrap" }} {{ else }}{{/* TODO merge with the bootstrap pod */}} - path: /etc/kubernetes/manifests.bootstrap/api-haproxy.yaml content: | apiVersion: v1 kind: Pod metadata: namespace: kube-system name: k8s-api-haproxy labels: component: k8s-api-haproxy tier: control-plane spec: hostNetwork: true priorityClassName: system-node-critical automountServiceAccountToken: false tolerations: - key: node.kubernetes.io/not-ready effect: NoSchedule containers: - name: api-haproxy image: haproxy:2.4.25-alpine resources: requests: cpu: 10m memory: 16Mi volumeMounts: - name: config mountPath: /usr/local/etc/haproxy/haproxy.cfg readOnly: true volumes: - name: config hostPath: type: File path: /etc/kubernetes/haproxy-api.cfg {{ end }} - path: /etc/cron.monthly/clean-archives-logs mode: 0o755 content: | #! /bin/bash find /var/log/archives/ -type f -mtime +20 -delete - path: /etc/local.d/blockdevs.start mode: 0o755 content: | #! /bin/bash for d in /sys/class/block/*/device; do d=${d%/device} [ $(<$d/queue/nr_requests) -ge 1024 ] || echo 1024 >$d/queue/nr_requests [ $(<$d/queue/rotational) -eq 0 ] || echo kyber >$d/queue/scheduler done {{ if not .vars.disable_secure_routes }} - path: /etc/local.d/secure-routes.start mode: 0o755 content: | #! /bin/bash set -ex if ip li add nvdummy type dummy then ip li set nvdummy up ip route add {{.cluster.subnets.services}} dev nvdummy fi for prefix in 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 ; do ip route list $prefix |grep -q . || ip route add unreachable $prefix done for prefix in fd00::/8 ; do ip -6 route list $prefix |grep -q . || ip -6 route add unreachable $prefix done {{ end }} - path: /root/host-checks.sh mode: 0o700 content: | #! /bin/bash {{ if .vars.is_master }} echo "kubelet health:" curl --cacert /etc/tls/kubelet-client/ca.crt \ --cert /etc/tls/kubelet-client/tls.crt \ --key /etc/tls/kubelet-client/tls.key \ https://{{host_name}}:10250/healthz echo echo "API health:" curl --cacert /etc/tls/apiserver/ca.crt \ --cert /etc/tls/cluster-client/tls.crt \ --key /etc/tls/cluster-client/tls.key \ https://127.0.0.1:{{ .vars.control_plane.api_port }}/healthz echo {{ end }}