use eyre::{format_err, Result}; use log::{debug, info, warn}; use std::path::{Path, PathBuf}; use tokio::{ fs, io::{AsyncBufReadExt, AsyncReadExt, AsyncWrite, AsyncWriteExt, BufReader}, }; use dkl::{ self, apply::{self, chroot, set_perms}, base64_decode, bootstrap::Config, }; use super::{exec, mount, retry, retry_or_ignore, try_exec, try_exec_cmd}; use crate::{fs::walk_dir, utils}; pub async fn bootstrap(cfg: Config) { let verifier = retry(async || Verifier::from_config(&cfg)).await; let bs = &cfg.bootstrap; mount(Some(&bs.dev), "/bootstrap", "ext4", None).await; // VPNs for vpn_conf in walk_dir("/bootstrap/vpns").await { if !vpn_conf.ends_with(".conf") { continue; } retry_or_ignore(async || { info!("starting VPN from {vpn_conf}"); try_exec("wg-quick", &["up", &vpn_conf]).await }) .await; } // prepare system let boot_version = utils::param("version").unwrap_or("current"); let base_dir = &format!("/bootstrap/{boot_version}"); retry_or_ignore(async || { if !fs::try_exists(&base_dir).await? { info!("creating {base_dir}"); fs::create_dir_all(&base_dir).await? } Ok(()) }) .await; let sys_cfg: dkl::Config = retry(async || { let sys_cfg_bytes = seed_config(base_dir, &bs, &verifier).await?; Ok(serde_yaml::from_slice(&sys_cfg_bytes)?) }) .await; mount_system(&sys_cfg, &cfg, base_dir, &verifier).await; retry_or_ignore(async || { let path = "/etc/resolv.conf"; if fs::try_exists(path).await? { info!("cp /etc/resolv.conf"); fs::copy(path, &format!("/system{path}")).await?; } Ok(()) }) .await; retry_or_ignore(async || apply::files(&sys_cfg.files, "/system", false).await).await; apply_groups(&sys_cfg.groups, "/system").await; apply_users(&sys_cfg.users, "/system").await; mount_filesystems(&sys_cfg.mounts, "/system").await; retry_or_ignore(async || { info!("setting up root user"); setup_root_user(&sys_cfg.root_user, "/system").await }) .await; exec("chroot", &["/system", "update-ca-certificates"]).await; // activate ttyS* consoles as needed retry_or_ignore(async || { const PATH: &str = "/system/etc/inittab"; let mut inittab = fs::read_to_string(PATH).await?; let mut changed = false; for opt in utils::cmdline().filter_map(|s| s.strip_prefix("console=ttyS")) { info!("inittab: adding entry for ttyS{opt}"); changed = true; let mut params = opt.split(','); let num = params.next().unwrap(); let speed = params.next().unwrap_or("115200"); inittab.push_str(&format!( "S{num}:12345:respawn:/sbin/agetty --noclear {speed} ttyS{num} linux\n" )); } if changed { fs::write(PATH, inittab.as_bytes()).await?; } Ok(()) }) .await; } struct Verifier { pubkey: Option>, } impl Verifier { fn from_config(cfg: &Config) -> Result { let Some(ref pubkey) = cfg.signer_public_key else { return Ok(Self { pubkey: None }); }; let pubkey = base64_decode(pubkey)?; let pubkey = Some(pubkey); return Ok(Self { pubkey }); } async fn verify_path(&self, path: impl AsRef) -> Result> { let path = path.as_ref(); let p = path.display(); let data = (fs::read(path).await).map_err(|e| format_err!("failed to read {p}: {e}"))?; let Some(ref pubkey) = self.pubkey else { return Ok(data); }; info!("verifying {p}"); let sig = path.with_added_extension("sig"); let sig = (fs::read(&sig).await) .map_err(|e| format_err!("failed to read {}: {e}", sig.display()))?; use openssl::{hash::MessageDigest, pkey::PKey, sign::Verifier}; let pubkey = PKey::public_key_from_der(pubkey)?; let sig_ok = Verifier::new(MessageDigest::sha512(), &pubkey)? .verify_oneshot(&sig, &data) .map_err(|e| format_err!("verify failed: {e}"))?; if sig_ok { Ok(data) } else { Err(format_err!("signature verification failed for {p}")) } } } async fn seed_config( base_dir: &str, bs: &dkl::bootstrap::Bootstrap, verifier: &Verifier, ) -> Result> { let cfg_path = &format!("{base_dir}/config.yaml"); if fs::try_exists(cfg_path).await? { return Ok(fs::read(cfg_path).await?); } let bs_tar = "/bootstrap.tar"; if !fs::try_exists(bs_tar).await? { if bs.seed.is_none() { return Err(format_err!( "no {cfg_path}, no {bs_tar} and no seed URL, can't bootstrap" )); } fetch_bootstrap(bs, bs_tar).await?; } try_exec("tar", &["xf", bs_tar, "-C", base_dir]).await?; if !fs::try_exists(cfg_path).await? { return Err(format_err!("{cfg_path} does not exist after seeding")); } verifier.verify_path(&cfg_path).await } async fn fetch_bootstrap(bs: &dkl::bootstrap::Bootstrap, output_file: &str) -> Result<()> { let seed_url: reqwest::Url = (bs.seed.as_ref()) .ok_or(format_err!("no seed URL"))? .parse() .map_err(|e| format_err!("invalid seed URL: {e}"))?; info!( "fetching {output_file} from {}", seed_url.host_str().unwrap_or("") ); let mut builder = reqwest::Client::builder(); if let Some(ref proxy) = bs.seed_proxy { debug!("using proxy {proxy}"); let proxy = reqwest::Proxy::all(proxy) // .map_err(|e| format_err!("seed proxy setup failed: {e}"))?; builder = builder.proxy(proxy); } if let Some(ref ca) = bs.seed_ca { debug!("using custom CA certificate"); let ca = base64_decode(ca).map_err(|e| format_err!("invalid seed CA: decode: {e}"))?; let ca = reqwest::Certificate::from_der(&ca) .map_err(|e| format_err!("invalid seed CA: parse: {e}"))?; builder = builder.tls_certs_only([ca]); } if let Some(ref sn) = bs.seed_servername { debug!("tls server name: {sn}"); builder = builder.tls_server_name(bs.seed_servername.clone()); } let req = builder.build()?.get(seed_url); let resp = req.send().await?; if !resp.status().is_success() { return Err(format_err!("HTTP request failed: {}", resp.status())); } let data = (resp.bytes().await).map_err(|e| format_err!("HTTP download failed: {e}"))?; (fs::write(output_file, &data).await) .map_err(|e| format_err!("output file write failed: {e}"))?; Ok(()) } fn default_root_tmpfs_opts() -> Option { let mem = sys_info::mem_info() .inspect_err(|e| warn!("failed to get system memory info, using default tmpfs size: {e}")) .ok()?; let mem_size = mem.total /* kiB */ / 1024; let fs_size = 1024.min(mem_size / 2); info!("system has {mem_size} MiB of memory, allowing {fs_size} MiB for root tmpfs"); Some(format!("size={fs_size}m")) } struct LayerMounter<'t> { bs_dir: &'t str, layers_dir: &'t str, verifier: &'t Verifier, lower_dir: String, } impl LayerMounter<'_> { fn src_path(&self, name: &str) -> PathBuf { let mut p = PathBuf::from(self.bs_dir); p.push(name); if name != "merged" { p.add_extension("fs"); } p } async fn exists(&self, name: &str) -> bool { retry(async || Ok(fs::try_exists(self.src_path(name)).await?)).await } async fn mount(&mut self, name: &str) { self.mount_path(self.src_path(name), name, true).await } async fn mount_path(&mut self, src: impl AsRef, name: &str, verify: bool) { let src = src.as_ref(); let tgt_dir = PathBuf::from(self.layers_dir).join(name); let tgt = tgt_dir.with_added_extension("fs"); if let Err(e) = fs::create_dir_all(&tgt_dir).await { warn!("mkdir -p {}: {e}", tgt_dir.display()); } let mount_src = if name == "merged" { retry(async || { let data = self.verifier.verify_path(src).await?; let data = MergedLayer::from_bytes(&data) .ok_or(format_err!("{}: invalid data", src.display()))?; data.create(&tgt) .await .map_err(|e| format_err!("write {}: {e}", tgt.display()))?; let dm_name = &format!("system"); let mut cmd = tokio::process::Command::new("veritysetup"); cmd.arg("open") .arg(format!("--hash-offset={}", data.hash_offset())) .arg(&tgt) .arg(dm_name) .arg(&tgt) .arg(data.root_hash_hex()); try_exec_cmd(cmd).await?; Ok(PathBuf::from("/dev/mapper").join(dm_name)) }) .await } else { retry(async || { let src = if verify { self.verifier.verify_path(src).await? } else { fs::read(src).await? }; fs::write(&tgt, &src).await?; Ok(tgt.clone()) }) .await }; retry(async || { let mut buf = [0u8; 1028]; fs::File::open(&mount_src) .await .map_err(|e| format_err!("open {}: {e}", mount_src.display()))? .read_exact(&mut buf) .await .map_err(|e| format_err!("read {}: {e}", mount_src.display()))?; let fstype = if buf[1024..1028] == 0xE0F5E1E2u32.to_le_bytes() { "erofs" } else { "squashfs" }; mount(Some(&mount_src), &tgt_dir, fstype, None).await; Ok(()) }) .await; if !self.lower_dir.is_empty() { self.lower_dir.push(':'); } self.lower_dir.push_str(&tgt_dir.to_string_lossy()); } } async fn mount_system(cfg: &dkl::Config, bs_cfg: &Config, bs_dir: &str, verifier: &Verifier) { let opts = match utils::param("root-opts") { Some(s) => Some(s.to_string()), None => default_root_tmpfs_opts(), }; let mem_dir = "/mem"; mount(None::<&str>, mem_dir, "tmpfs", opts.as_deref()).await; let mut mounter = LayerMounter { bs_dir, layers_dir: &format!("{mem_dir}/layers"), verifier, lower_dir: String::new(), }; if mounter.exists("merged").await { mounter.mount("merged").await; } else { for layer in &cfg.layers { if layer == "modules" && bs_cfg.modules.is_some() { continue; // take modules from initrd } mounter.mount(layer).await; } } if let Some(ref modules) = bs_cfg.modules { mounter.mount_path(modules, "modules", false).await; } let upper_dir = &format!("{mem_dir}/upper"); let work_dir = &format!("{mem_dir}/work"); retry_or_ignore(async || { fs::create_dir_all(upper_dir).await?; fs::create_dir_all(work_dir).await?; Ok(()) }) .await; let lower_dir = &mounter.lower_dir; let opts = format!("lowerdir={lower_dir},upperdir={upper_dir},workdir={work_dir}"); mount(None::<&str>, "/system", "overlay", Some(&opts)).await; // make root rshared (default in systemd, required by Kubernetes 1.10+) // equivalent to "mount --make-rshared /" // see kernel's Documentation/sharedsubtree.txt (search rshared) retry_or_ignore(async || { use nix::mount::MsFlags as M; const NONE: Option<&str> = None; nix::mount::mount(NONE, "/system", NONE, M::MS_SHARED | M::MS_REC, NONE)?; Ok(()) }) .await; } struct MergedLayer<'t> { #[allow(unused)] root_hash_sig: &'t [u8], root_hash: &'t [u8], data: &'t [u8], hash: &'t [u8], } impl<'t> MergedLayer<'t> { fn from_bytes(mut src: &'t [u8]) -> Option { let mut next = || { let (len, rem) = src.split_at_checked(8)?; let len = u64::from_be_bytes(len.try_into().ok()?); let (data, rem) = rem.split_at_checked(len as usize)?; src = rem; Some(data) }; Some(Self { root_hash_sig: next()?, root_hash: next()?, data: next()?, hash: next()?, }) } async fn create(&self, path: impl AsRef) -> std::io::Result<()> { let mut out = fs::File::create(path).await?; self.write_to(&mut out).await?; out.shutdown().await } async fn write_to(&self, mut out: impl AsyncWrite + Unpin) -> std::io::Result<()> { out.write_all(self.data).await?; out.write_all(self.hash).await?; Ok(()) } fn hash_offset(&self) -> usize { self.data.len() } fn root_hash_hex(&self) -> String { hex::encode(self.root_hash) } } async fn apply_groups(groups: &[dkl::Group], root: &str) { for group in groups { let mut args = vec![root, "groupadd", "-r"]; let gid = group.gid.map(|s| s.to_string()); if let Some(gid) = gid.as_ref() { args.extend(&["-g", gid]); } args.push(group.name.as_str()); exec("chroot", &args).await; } } async fn apply_users(users: &[dkl::User], root: &str) { for user in users { let mut args = vec![root, "useradd", "-r"]; let uid = user.uid.map(|s| s.to_string()); if let Some(uid) = uid.as_ref() { args.extend(&["-u", uid]); } let gid = user.gid.map(|s| s.to_string()); if let Some(gid) = gid.as_ref() { args.extend(&["-g", gid]); } args.push(user.name.as_str()); exec("chroot", &args).await; } } async fn mount_filesystems(mounts: &[dkl::Mount], root: &str) { for m in mounts { let path = chroot(root, &m.path); mount( Some(&m.dev), &path, (m.r#type.as_deref()) .filter(|s| !s.is_empty()) .unwrap_or("ext4"), m.options.as_deref().filter(|v| !v.is_empty()), ) .await; } } async fn setup_root_user(user: &dkl::RootUser, root: &str) -> Result<()> { if let Some(pw_hash) = user.password_hash.as_ref().filter(|v| !v.is_empty()) { set_user_password("root", &pw_hash, root).await?; } let mut authorized_keys = Vec::new(); for ak in &user.authorized_keys { authorized_keys.extend(ak.as_bytes()); authorized_keys.push(b'\n'); } let ssh_dir = &chroot(root, "root/.ssh"); fs::create_dir_all(ssh_dir) .await .map_err(|e| format_err!("mkdir -p {ssh_dir} failed: {e}"))?; set_perms(ssh_dir, Some(0o700)) .await .map_err(|e| format_err!("chmod {ssh_dir} failed: {e}"))?; let ak_path = &format!("{ssh_dir}/authorized_keys"); fs::write(ak_path, authorized_keys) .await .map_err(|e| format_err!("write {ak_path} failed: {e}"))?; Ok(()) } async fn set_user_password(user: &str, password_hash: &str, root: &str) -> Result<()> { info!("setting password for {user}"); let user = user.as_bytes(); let password_hash = password_hash.as_bytes(); let mut buf = Vec::new(); let pw_file = &chroot(root, "etc/shadow"); let rd = fs::File::open(pw_file) .await .map_err(|e| format_err!("open {pw_file} failed: {e}"))?; let mut rd = BufReader::new(rd); let mut line = Vec::new(); while (rd.read_until(b'\n', &mut line).await) .map_err(|e| format_err!("read {pw_file} failed: {e}"))? != 0 { let mut split: Vec<_> = line.split(|c| *c == b':').collect(); if split.len() > 2 && split[0] == user { split[1] = password_hash; buf.extend(split.join(&b':')); } else { buf.extend(&line); } line.clear(); } fs::write(pw_file, buf).await?; Ok(()) }