Files
initrd/src/cmd/init/bootstrap.rs
T
2026-05-08 11:56:45 +02:00

556 lines
16 KiB
Rust

use eyre::{format_err, Result};
use log::{debug, info, warn};
use std::path::{Path, PathBuf};
use tokio::{
fs,
io::{AsyncBufReadExt, AsyncReadExt, AsyncWrite, AsyncWriteExt, BufReader},
};
use dkl::{
self,
apply::{self, chroot, set_perms},
base64_decode,
bootstrap::Config,
};
use super::{exec, mount, retry, retry_or_ignore, try_exec, try_exec_cmd};
use crate::{fs::walk_dir, utils};
pub async fn bootstrap(cfg: Config) {
let verifier = retry(async || Verifier::from_config(&cfg)).await;
let bs = &cfg.bootstrap;
mount(Some(&bs.dev), "/bootstrap", "ext4", None).await;
// VPNs
for vpn_conf in walk_dir("/bootstrap/vpns").await {
if !vpn_conf.ends_with(".conf") {
continue;
}
retry_or_ignore(async || {
info!("starting VPN from {vpn_conf}");
try_exec("wg-quick", &["up", &vpn_conf]).await
})
.await;
}
// prepare system
let boot_version = utils::param("version").unwrap_or("current");
let base_dir = &format!("/bootstrap/{boot_version}");
retry_or_ignore(async || {
if !fs::try_exists(&base_dir).await? {
info!("creating {base_dir}");
fs::create_dir_all(&base_dir).await?
}
Ok(())
})
.await;
let sys_cfg: dkl::Config = retry(async || {
let sys_cfg_bytes = seed_config(base_dir, &bs, &verifier).await?;
Ok(serde_yaml::from_slice(&sys_cfg_bytes)?)
})
.await;
mount_system(&sys_cfg, &cfg, base_dir, &verifier).await;
retry_or_ignore(async || {
let path = "/etc/resolv.conf";
if fs::try_exists(path).await? {
info!("cp /etc/resolv.conf");
fs::copy(path, &format!("/system{path}")).await?;
}
Ok(())
})
.await;
retry_or_ignore(async || apply::files(&sys_cfg.files, "/system", false).await).await;
apply_groups(&sys_cfg.groups, "/system").await;
apply_users(&sys_cfg.users, "/system").await;
mount_filesystems(&sys_cfg.mounts, "/system").await;
retry_or_ignore(async || {
info!("setting up root user");
setup_root_user(&sys_cfg.root_user, "/system").await
})
.await;
exec("chroot", &["/system", "update-ca-certificates"]).await;
// activate ttyS* consoles as needed
retry_or_ignore(async || {
const PATH: &str = "/system/etc/inittab";
let mut inittab = fs::read_to_string(PATH).await?;
let mut changed = false;
for opt in utils::cmdline().filter_map(|s| s.strip_prefix("console=ttyS")) {
info!("inittab: adding entry for ttyS{opt}");
changed = true;
let mut params = opt.split(',');
let num = params.next().unwrap();
let speed = params.next().unwrap_or("115200");
inittab.push_str(&format!(
"S{num}:12345:respawn:/sbin/agetty --noclear {speed} ttyS{num} linux\n"
));
}
if changed {
fs::write(PATH, inittab.as_bytes()).await?;
}
Ok(())
})
.await;
}
struct Verifier {
pubkey: Option<Vec<u8>>,
}
impl Verifier {
fn from_config(cfg: &Config) -> Result<Self> {
let Some(ref pubkey) = cfg.signer_public_key else {
return Ok(Self { pubkey: None });
};
let pubkey = base64_decode(pubkey)?;
let pubkey = Some(pubkey);
return Ok(Self { pubkey });
}
async fn verify_path(&self, path: impl AsRef<Path>) -> Result<Vec<u8>> {
let path = path.as_ref();
let p = path.display();
let data = (fs::read(path).await).map_err(|e| format_err!("failed to read {p}: {e}"))?;
let Some(ref pubkey) = self.pubkey else {
return Ok(data);
};
info!("verifying {p}");
let sig = path.with_added_extension("sig");
let sig = (fs::read(&sig).await)
.map_err(|e| format_err!("failed to read {}: {e}", sig.display()))?;
use openssl::{hash::MessageDigest, pkey::PKey, sign::Verifier};
let pubkey = PKey::public_key_from_der(pubkey)?;
let sig_ok = Verifier::new(MessageDigest::sha512(), &pubkey)?
.verify_oneshot(&sig, &data)
.map_err(|e| format_err!("verify failed: {e}"))?;
if sig_ok {
Ok(data)
} else {
Err(format_err!("signature verification failed for {p}"))
}
}
}
async fn seed_config(
base_dir: &str,
bs: &dkl::bootstrap::Bootstrap,
verifier: &Verifier,
) -> Result<Vec<u8>> {
let cfg_path = &format!("{base_dir}/config.yaml");
if fs::try_exists(cfg_path).await? {
return Ok(fs::read(cfg_path).await?);
}
let bs_tar = "/bootstrap.tar";
if !fs::try_exists(bs_tar).await? {
if bs.seed.is_none() {
return Err(format_err!(
"no {cfg_path}, no {bs_tar} and no seed URL, can't bootstrap"
));
}
fetch_bootstrap(bs, bs_tar).await?;
}
try_exec("tar", &["xf", bs_tar, "-C", base_dir]).await?;
if !fs::try_exists(cfg_path).await? {
return Err(format_err!("{cfg_path} does not exist after seeding"));
}
verifier.verify_path(&cfg_path).await
}
async fn fetch_bootstrap(bs: &dkl::bootstrap::Bootstrap, output_file: &str) -> Result<()> {
let seed_url: reqwest::Url = (bs.seed.as_ref())
.ok_or(format_err!("no seed URL"))?
.parse()
.map_err(|e| format_err!("invalid seed URL: {e}"))?;
info!(
"fetching {output_file} from {}",
seed_url.host_str().unwrap_or("<no host>")
);
let mut builder = reqwest::Client::builder();
if let Some(ref proxy) = bs.seed_proxy {
debug!("using proxy {proxy}");
let proxy = reqwest::Proxy::all(proxy) //
.map_err(|e| format_err!("seed proxy setup failed: {e}"))?;
builder = builder.proxy(proxy);
}
if let Some(ref ca) = bs.seed_ca {
debug!("using custom CA certificate");
let ca = base64_decode(ca).map_err(|e| format_err!("invalid seed CA: decode: {e}"))?;
let ca = reqwest::Certificate::from_der(&ca)
.map_err(|e| format_err!("invalid seed CA: parse: {e}"))?;
builder = builder.tls_certs_only([ca]);
}
if let Some(ref sn) = bs.seed_servername {
debug!("tls server name: {sn}");
builder = builder.tls_server_name(bs.seed_servername.clone());
}
let req = builder.build()?.get(seed_url);
let resp = req.send().await?;
if !resp.status().is_success() {
return Err(format_err!("HTTP request failed: {}", resp.status()));
}
let data = (resp.bytes().await).map_err(|e| format_err!("HTTP download failed: {e}"))?;
(fs::write(output_file, &data).await)
.map_err(|e| format_err!("output file write failed: {e}"))?;
Ok(())
}
fn default_root_tmpfs_opts() -> Option<String> {
let mem = sys_info::mem_info()
.inspect_err(|e| warn!("failed to get system memory info, using default tmpfs size: {e}"))
.ok()?;
let mem_size = mem.total /* kiB */ / 1024;
let fs_size = 1024.min(mem_size / 2);
info!("system has {mem_size} MiB of memory, allowing {fs_size} MiB for root tmpfs");
Some(format!("size={fs_size}m"))
}
struct LayerMounter<'t> {
bs_dir: &'t str,
layers_dir: &'t str,
verifier: &'t Verifier,
lower_dir: String,
}
impl LayerMounter<'_> {
fn src_path(&self, name: &str) -> PathBuf {
let mut p = PathBuf::from(self.bs_dir);
p.push(name);
if name != "merged" {
p.add_extension("fs");
}
p
}
async fn exists(&self, name: &str) -> bool {
retry(async || Ok(fs::try_exists(self.src_path(name)).await?)).await
}
async fn mount(&mut self, name: &str) {
self.mount_path(self.src_path(name), name, true).await
}
async fn mount_path(&mut self, src: impl AsRef<Path>, name: &str, verify: bool) {
let src = src.as_ref();
let tgt_dir = PathBuf::from(self.layers_dir).join(name);
let tgt = tgt_dir.with_added_extension("fs");
if let Err(e) = fs::create_dir_all(&tgt_dir).await {
warn!("mkdir -p {}: {e}", tgt_dir.display());
}
let mount_src = if name == "merged" {
retry(async || {
let data = self.verifier.verify_path(src).await?;
let data = MergedLayer::from_bytes(&data)
.ok_or(format_err!("{}: invalid data", src.display()))?;
data.create(&tgt)
.await
.map_err(|e| format_err!("write {}: {e}", tgt.display()))?;
let dm_name = &format!("system");
let mut cmd = tokio::process::Command::new("veritysetup");
cmd.arg("open")
.arg(format!("--hash-offset={}", data.hash_offset()))
.arg(&tgt)
.arg(dm_name)
.arg(&tgt)
.arg(data.root_hash_hex());
try_exec_cmd(cmd).await?;
Ok(PathBuf::from("/dev/mapper").join(dm_name))
})
.await
} else {
retry(async || {
let src = if verify {
self.verifier.verify_path(src).await?
} else {
fs::read(src).await?
};
fs::write(&tgt, &src).await?;
Ok(tgt.clone())
})
.await
};
retry(async || {
let mut buf = [0u8; 1028];
fs::File::open(&mount_src)
.await
.map_err(|e| format_err!("open {}: {e}", mount_src.display()))?
.read_exact(&mut buf)
.await
.map_err(|e| format_err!("read {}: {e}", mount_src.display()))?;
let fstype = if buf[1024..1028] == 0xE0F5E1E2u32.to_le_bytes() {
"erofs"
} else {
"squashfs"
};
mount(Some(&mount_src), &tgt_dir, fstype, None).await;
Ok(())
})
.await;
if !self.lower_dir.is_empty() {
self.lower_dir.push(':');
}
self.lower_dir.push_str(&tgt_dir.to_string_lossy());
}
}
async fn mount_system(cfg: &dkl::Config, bs_cfg: &Config, bs_dir: &str, verifier: &Verifier) {
let opts = match utils::param("root-opts") {
Some(s) => Some(s.to_string()),
None => default_root_tmpfs_opts(),
};
let mem_dir = "/mem";
mount(None::<&str>, mem_dir, "tmpfs", opts.as_deref()).await;
let mut mounter = LayerMounter {
bs_dir,
layers_dir: &format!("{mem_dir}/layers"),
verifier,
lower_dir: String::new(),
};
if mounter.exists("merged").await {
mounter.mount("merged").await;
} else {
for layer in &cfg.layers {
if layer == "modules" && bs_cfg.modules.is_some() {
continue; // take modules from initrd
}
mounter.mount(layer).await;
}
}
if let Some(ref modules) = bs_cfg.modules {
mounter.mount_path(modules, "modules", false).await;
}
let upper_dir = &format!("{mem_dir}/upper");
let work_dir = &format!("{mem_dir}/work");
retry_or_ignore(async || {
fs::create_dir_all(upper_dir).await?;
fs::create_dir_all(work_dir).await?;
Ok(())
})
.await;
let lower_dir = &mounter.lower_dir;
let opts = format!("lowerdir={lower_dir},upperdir={upper_dir},workdir={work_dir}");
mount(None::<&str>, "/system", "overlay", Some(&opts)).await;
// make root rshared (default in systemd, required by Kubernetes 1.10+)
// equivalent to "mount --make-rshared /"
// see kernel's Documentation/sharedsubtree.txt (search rshared)
retry_or_ignore(async || {
use nix::mount::MsFlags as M;
const NONE: Option<&str> = None;
nix::mount::mount(NONE, "/system", NONE, M::MS_SHARED | M::MS_REC, NONE)?;
Ok(())
})
.await;
}
struct MergedLayer<'t> {
#[allow(unused)]
root_hash_sig: &'t [u8],
root_hash: &'t [u8],
data: &'t [u8],
hash: &'t [u8],
}
impl<'t> MergedLayer<'t> {
fn from_bytes(mut src: &'t [u8]) -> Option<Self> {
let mut next = || {
let (len, rem) = src.split_at_checked(8)?;
let len = u64::from_be_bytes(len.try_into().ok()?);
let (data, rem) = rem.split_at_checked(len as usize)?;
src = rem;
Some(data)
};
Some(Self {
root_hash_sig: next()?,
root_hash: next()?,
data: next()?,
hash: next()?,
})
}
async fn create(&self, path: impl AsRef<Path>) -> std::io::Result<()> {
let mut out = fs::File::create(path).await?;
self.write_to(&mut out).await?;
out.shutdown().await
}
async fn write_to(&self, mut out: impl AsyncWrite + Unpin) -> std::io::Result<()> {
out.write_all(self.data).await?;
out.write_all(self.hash).await?;
Ok(())
}
fn hash_offset(&self) -> usize {
self.data.len()
}
fn root_hash_hex(&self) -> String {
hex::encode(self.root_hash)
}
}
async fn apply_groups(groups: &[dkl::Group], root: &str) {
for group in groups {
let mut args = vec![root, "groupadd", "-r"];
let gid = group.gid.map(|s| s.to_string());
if let Some(gid) = gid.as_ref() {
args.extend(&["-g", gid]);
}
args.push(group.name.as_str());
exec("chroot", &args).await;
}
}
async fn apply_users(users: &[dkl::User], root: &str) {
for user in users {
let mut args = vec![root, "useradd", "-r"];
let uid = user.uid.map(|s| s.to_string());
if let Some(uid) = uid.as_ref() {
args.extend(&["-u", uid]);
}
let gid = user.gid.map(|s| s.to_string());
if let Some(gid) = gid.as_ref() {
args.extend(&["-g", gid]);
}
args.push(user.name.as_str());
exec("chroot", &args).await;
}
}
async fn mount_filesystems(mounts: &[dkl::Mount], root: &str) {
for m in mounts {
let path = chroot(root, &m.path);
mount(
Some(&m.dev),
&path,
(m.r#type.as_deref())
.filter(|s| !s.is_empty())
.unwrap_or("ext4"),
m.options.as_deref().filter(|v| !v.is_empty()),
)
.await;
}
}
async fn setup_root_user(user: &dkl::RootUser, root: &str) -> Result<()> {
if let Some(pw_hash) = user.password_hash.as_ref().filter(|v| !v.is_empty()) {
set_user_password("root", &pw_hash, root).await?;
}
let mut authorized_keys = Vec::new();
for ak in &user.authorized_keys {
authorized_keys.extend(ak.as_bytes());
authorized_keys.push(b'\n');
}
let ssh_dir = &chroot(root, "root/.ssh");
fs::create_dir_all(ssh_dir)
.await
.map_err(|e| format_err!("mkdir -p {ssh_dir} failed: {e}"))?;
set_perms(ssh_dir, Some(0o700))
.await
.map_err(|e| format_err!("chmod {ssh_dir} failed: {e}"))?;
let ak_path = &format!("{ssh_dir}/authorized_keys");
fs::write(ak_path, authorized_keys)
.await
.map_err(|e| format_err!("write {ak_path} failed: {e}"))?;
Ok(())
}
async fn set_user_password(user: &str, password_hash: &str, root: &str) -> Result<()> {
info!("setting password for {user}");
let user = user.as_bytes();
let password_hash = password_hash.as_bytes();
let mut buf = Vec::new();
let pw_file = &chroot(root, "etc/shadow");
let rd = fs::File::open(pw_file)
.await
.map_err(|e| format_err!("open {pw_file} failed: {e}"))?;
let mut rd = BufReader::new(rd);
let mut line = Vec::new();
while (rd.read_until(b'\n', &mut line).await)
.map_err(|e| format_err!("read {pw_file} failed: {e}"))?
!= 0
{
let mut split: Vec<_> = line.split(|c| *c == b':').collect();
if split.len() > 2 && split[0] == user {
split[1] = password_hash;
buf.extend(split.join(&b':'));
} else {
buf.extend(&line);
}
line.clear();
}
fs::write(pw_file, buf).await?;
Ok(())
}