merged layer handling

This commit is contained in:
Mikaël Cluseau
2026-04-20 22:27:24 +02:00
parent c8bbbf858a
commit e7769155e1
5 changed files with 233 additions and 64 deletions

1
Cargo.lock generated
View File

@@ -991,6 +991,7 @@ dependencies = [
"env_logger",
"eyre",
"glob",
"hex",
"itertools",
"libc",
"log",

View File

@@ -30,3 +30,4 @@ dkl = { git = "https://novit.tech/direktil/dkl", version = "1.0.0" }
openssl = "0.10.73"
reqwest = { version = "0.13.1", features = ["native-tls"] }
glob = "0.3.3"
hex = "0.4.3"

View File

@@ -54,10 +54,10 @@ pub async fn run() {
info!("Linux version {kernel_version}");
// mount basic filesystems
mount(None, "/proc", "proc", None).await;
mount(None, "/sys", "sysfs", None).await;
mount(None, "/dev", "devtmpfs", None).await;
mount(None, "/dev/pts", "devpts", Some("gid=5,mode=620")).await;
mount(None::<&str>, "/proc", "proc", None).await;
mount(None::<&str>, "/sys", "sysfs", None).await;
mount(None::<&str>, "/dev", "devtmpfs", None).await;
mount(None::<&str>, "/dev/pts", "devpts", Some("gid=5,mode=620")).await;
if utils::bool_param("debug") {
log::set_max_level(log::LevelFilter::Debug);
@@ -177,15 +177,27 @@ async fn chmod(path: impl AsRef<Path>, mode: u32) -> std::io::Result<()> {
fs::set_permissions(path, perms).await
}
async fn mount(src: Option<&str>, dst: &str, fstype: &str, opts: Option<&str>) {
async fn mount<S: AsRef<Path>>(
src: Option<S>,
dst: impl AsRef<Path>,
fstype: &str,
opts: Option<&str>,
) {
let src = src.as_ref().map(|s| s.as_ref());
let src_str = src.map(|s| s.display().to_string());
let src_str = src_str.as_deref();
let dst = dst.as_ref();
let dst_str = &dst.display().to_string();
if let Err(e) = fs::create_dir_all(dst).await {
error!("failed to create dir {dst}: {e}");
error!("failed to create dir {dst_str}: {e}");
}
retry_or_ignore(async || {
let mut is_file = false;
if let Some(src) = src {
if let Some(src) = src_str {
is_file = (fs::metadata(src).await)
.map_err(|e| format_err!("stat {src} failed: {e}"))?
.is_file();
@@ -197,7 +209,7 @@ async fn mount(src: Option<&str>, dst: &str, fstype: &str, opts: Option<&str>) {
}
}
let mut args = vec![src.unwrap_or("none"), dst, "-t", fstype];
let mut args = vec![src_str.unwrap_or("none"), dst_str, "-t", fstype];
if let Some(opts) = opts {
args.extend(["-o", opts]);
}
@@ -209,11 +221,17 @@ async fn mount(src: Option<&str>, dst: &str, fstype: &str, opts: Option<&str>) {
}
let (cmd_str, _) = cmd_str("mount", &args);
let flags = nix::mount::MsFlags::empty();
info!("# {cmd_str}",);
nix::mount::mount(src, dst, Some(fstype), flags, opts)
.map_err(|e| format_err!("mount {dst} failed: {e}"))
let mount = |flags| nix::mount::mount(src, dst, Some(fstype), flags, opts);
use nix::{errno::Errno, mount::MsFlags};
match mount(MsFlags::empty()) {
Err(Errno::EACCES) => mount(MsFlags::MS_RDONLY),
r => r,
}
.map_err(|e| format_err!("mount {dst_str} failed: {e}"))
})
.await
}
@@ -228,6 +246,25 @@ async fn start_daemon(prog: &str, args: &[&str]) {
.await;
}
async fn try_exec_cmd(mut cmd: tokio::process::Command) -> Result<()> {
info!(
"# {} {}",
cmd.as_std().get_program().to_string_lossy(),
cmd.as_std()
.get_args()
.map(|a| a.to_string_lossy())
.collect::<Vec<_>>()
.join(" ")
);
let s = cmd.status().await?;
if s.success() {
Ok(())
} else {
Err(format_err!("command failed: {s}"))
}
}
async fn try_exec(prog: &str, args: &[&str]) -> Result<()> {
let (cmd_str, mut cmd) = cmd_str(prog, args);
info!("# {cmd_str}");

View File

@@ -1,8 +1,9 @@
use eyre::{format_err, Result};
use log::{info, warn};
use std::path::{Path, PathBuf};
use tokio::{
fs,
io::{AsyncBufReadExt, AsyncWriteExt, BufReader},
io::{AsyncBufReadExt, AsyncReadExt, AsyncWrite, AsyncWriteExt, BufReader},
};
use dkl::{
@@ -12,7 +13,7 @@ use dkl::{
bootstrap::Config,
};
use super::{exec, mount, retry, retry_or_ignore, try_exec};
use super::{exec, mount, retry, retry_or_ignore, try_exec, try_exec_cmd};
use crate::{fs::walk_dir, utils};
pub async fn bootstrap(cfg: Config) {
@@ -96,17 +97,21 @@ impl Verifier {
return Ok(Self { pubkey });
}
async fn verify_path(&self, path: &str) -> Result<Vec<u8>> {
let data = (fs::read(path).await).map_err(|e| format_err!("failed to read {path}: {e}"))?;
async fn verify_path(&self, path: impl AsRef<Path>) -> Result<Vec<u8>> {
let path = path.as_ref();
let p = path.display();
let data = (fs::read(path).await).map_err(|e| format_err!("failed to read {p}: {e}"))?;
let Some(ref pubkey) = self.pubkey else {
return Ok(data);
};
info!("verifying {path}");
info!("verifying {p}");
let sig = &format!("{path}.sig");
let sig = (fs::read(sig).await).map_err(|e| format_err!("failed to read {sig}: {e}"))?;
let sig = path.with_added_extension("sig");
let sig = (fs::read(&sig).await)
.map_err(|e| format_err!("failed to read {}: {e}", sig.display()))?;
use openssl::{hash::MessageDigest, pkey::PKey, sign::Verifier};
let pubkey = PKey::public_key_from_der(pubkey)?;
@@ -118,7 +123,7 @@ impl Verifier {
if sig_ok {
Ok(data)
} else {
Err(format_err!("signature verification failed for {path}"))
Err(format_err!("signature verification failed for {p}"))
}
}
}
@@ -187,57 +192,136 @@ fn default_root_tmpfs_opts() -> Option<String> {
Some(format!("size={fs_size}m"))
}
async fn mount_system(
cfg: &dkl::Config,
bs_cfg: &Config,
bs_dir: &str,
verifier: &Verifier,
) {
struct LayerMounter<'t> {
bs_dir: &'t str,
layers_dir: &'t str,
verifier: &'t Verifier,
lower_dir: String,
}
impl LayerMounter<'_> {
fn src_path(&self, name: &str) -> PathBuf {
let mut p = PathBuf::from(self.bs_dir);
p.push(name);
if name != "merged" {
p.add_extension("fs");
}
p
}
async fn exists(&self, name: &str) -> bool {
retry(async || Ok(fs::try_exists(self.src_path(name)).await?)).await
}
async fn mount(&mut self, name: &str) {
self.mount_path(self.src_path(name), name, true).await
}
async fn mount_path(&mut self, src: impl AsRef<Path>, name: &str, verify: bool) {
let src = src.as_ref();
let tgt_dir = PathBuf::from(self.layers_dir).join(name);
let tgt = tgt_dir.with_added_extension("fs");
if let Err(e) = fs::create_dir_all(&tgt_dir).await {
warn!("mkdir -p {}: {e}", tgt_dir.display());
}
let mount_src = if name == "merged" {
retry(async || {
let data = self.verifier.verify_path(src).await?;
let data = MergedLayer::from_bytes(&data)
.ok_or(format_err!("{}: invalid data", src.display()))?;
data.create(&tgt)
.await
.map_err(|e| format_err!("write {}: {e}", tgt.display()))?;
let dm_name = &format!("system");
let mut cmd = tokio::process::Command::new("veritysetup");
cmd.arg("open")
.arg(format!("--hash-offset={}", data.hash_offset()))
.arg(&tgt)
.arg(dm_name)
.arg(&tgt)
.arg(data.root_hash_hex());
try_exec_cmd(cmd).await?;
Ok(PathBuf::from("/dev/mapper").join(dm_name))
})
.await
} else {
retry(async || {
let src = if verify {
self.verifier.verify_path(src).await?
} else {
fs::read(src).await?
};
fs::write(&tgt, &src).await?;
Ok(tgt.clone())
})
.await
};
retry(async || {
let mut buf = [0u8; 1028];
fs::File::open(&mount_src)
.await
.map_err(|e| format_err!("open {}: {e}", mount_src.display()))?
.read_exact(&mut buf)
.await
.map_err(|e| format_err!("read {}: {e}", mount_src.display()))?;
let fstype = if buf[1024..1028] == 0xE0F5E1E2u32.to_le_bytes() {
"erofs"
} else {
"squashfs"
};
mount(Some(&mount_src), &tgt_dir, fstype, None).await;
Ok(())
})
.await;
if !self.lower_dir.is_empty() {
self.lower_dir.push(':');
}
self.lower_dir.push_str(&tgt_dir.to_string_lossy());
}
}
async fn mount_system(cfg: &dkl::Config, bs_cfg: &Config, bs_dir: &str, verifier: &Verifier) {
let opts = match utils::param("root-opts") {
Some(s) => Some(s.to_string()),
None => default_root_tmpfs_opts(),
};
let mem_dir = "/mem";
mount(None, mem_dir, "tmpfs", opts.as_deref()).await;
mount(None::<&str>, mem_dir, "tmpfs", opts.as_deref()).await;
let layers_dir = &format!("{mem_dir}/layers");
let mut lower_dir = String::new();
for layer in &cfg.layers {
let src = retry(async || {
if layer == "modules" && let Some(src) = bs_cfg.modules.as_ref() {
(fs::read(src).await).map_err(|e| format_err!("read {src} failed: {e}"))
} else {
verifier.verify_path(&format!("{bs_dir}/{layer}.fs")).await
}
})
.await;
let fstype = if src.get(1024..1028) == Some(&0xE0F5E1E2u32.to_le_bytes()) {
"erofs"
} else {
"squashfs"
let mut mounter = LayerMounter {
bs_dir,
layers_dir: &format!("{mem_dir}/layers"),
verifier,
lower_dir: String::new(),
};
let tgt = &format!("{mem_dir}/{layer}.fs");
retry(async || {
info!("copying layer {layer}");
let mut out = (fs::File::create(tgt).await)
.map_err(|e| format_err!("create {tgt} failed: {e}"))?;
(out.write_all(&src).await).map_err(|e| format_err!("write failed: {e}"))?;
(out.flush().await).map_err(|e| format_err!("write failed: {e}"))
})
.await;
let layer_dir = &format!("{layers_dir}/{layer}");
mount(Some(tgt), layer_dir, fstype, None).await;
if !lower_dir.is_empty() {
lower_dir.push(':');
if mounter.exists("merged").await {
mounter.mount("merged").await;
} else {
for layer in &cfg.layers {
if layer == "modules" && bs_cfg.modules.is_some() {
continue; // take modules from initrd
}
lower_dir.push_str(&layer_dir);
mounter.mount(layer).await;
}
}
if let Some(ref modules) = bs_cfg.modules {
mounter.mount_path(modules, "modules", false).await;
}
let upper_dir = &format!("{mem_dir}/upper");
@@ -250,8 +334,9 @@ async fn mount_system(
})
.await;
let lower_dir = &mounter.lower_dir;
let opts = format!("lowerdir={lower_dir},upperdir={upper_dir},workdir={work_dir}");
mount(None, "/system", "overlay", Some(&opts)).await;
mount(None::<&str>, "/system", "overlay", Some(&opts)).await;
// make root rshared (default in systemd, required by Kubernetes 1.10+)
// equivalent to "mount --make-rshared /"
@@ -265,6 +350,53 @@ async fn mount_system(
.await;
}
struct MergedLayer<'t> {
#[allow(unused)]
root_hash_sig: &'t [u8],
root_hash: &'t [u8],
data: &'t [u8],
hash: &'t [u8],
}
impl<'t> MergedLayer<'t> {
fn from_bytes(mut src: &'t [u8]) -> Option<Self> {
let mut next = || {
let (len, rem) = src.split_at_checked(8)?;
let len = u64::from_be_bytes(len.try_into().ok()?);
let (data, rem) = rem.split_at_checked(len as usize)?;
src = rem;
Some(data)
};
Some(Self {
root_hash_sig: next()?,
root_hash: next()?,
data: next()?,
hash: next()?,
})
}
async fn create(&self, path: impl AsRef<Path>) -> std::io::Result<()> {
let mut out = fs::File::create(path).await?;
self.write_to(&mut out).await?;
out.shutdown().await
}
async fn write_to(&self, mut out: impl AsyncWrite + Unpin) -> std::io::Result<()> {
out.write_all(self.data).await?;
out.write_all(self.hash).await?;
Ok(())
}
fn hash_offset(&self) -> usize {
self.data.len()
}
fn root_hash_hex(&self) -> String {
hex::encode(self.root_hash)
}
}
async fn apply_groups(groups: &[dkl::Group], root: &str) {
for group in groups {
let mut args = vec![root, "groupadd", "-r"];

View File

@@ -21,9 +21,6 @@ auths:
sshKey: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICkpbU6sf4t0f6XAv9DuW3XH5iLM0AI5rc8PT2jwea1N
password: bXlzZWVk:HMSxrg1cYphaPuUYUbtbl/htep/tVYYIQAuvkNMVpw0 # mypass
signer_public_key: 'MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAd5sR4NqLtjSt8ESNlYWvuufYj7v+aYGDlgxQThcKbzDPVe639IfH94hHE0l9TAfyU94qtN/GpFyKJ68F/u2pu70A/umT1m24ELFDqXlQXqhTsH91r+nYUZ7due3EqSrvru/yjchNNRkpoCCu3QkDF25KnrYfWWHqj9ZIRlBTCJE9SwM='
ssh:
listen: "[::]:22"
user_ca: /user_ca.pub
@@ -98,7 +95,8 @@ lvm:
#- dev: /dev/storage/bootstrap
#- dev: /dev/storage/dls
signer_public_key: 'MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQBe6Y3zGQUIHvVXoS5GI8irY8yoB0ozFpzn/cUykA46TkHdJ8xCEaaM1MpqMrfWgDtP/rA2KeE9HjVerLnEFD01uUAUh4/OYgCBDYJPhridVDoC78KOJpkWBj7Shl0Rp0AtETvatNPa1RRe15V7nDF/Nm75Y6O3IL29lYPQ6jqEGhR810='
bootstrap:
dev: /dev/storage/bootstrap
seed: http://192.168.12.254:7606/public/download-set/host/m1/bootstrap.tar?set=IAB4O5F2UYY5YB3SSGEBEPJQCEZPI37WRDWMFXBFS7AHWFHNAYLAPN3AKROCAWPR4X52H62WIPGK4PVU4KQTD2C7SSHTGTJLAGDSDMQDAQRE2GDEOC4RWAAAQA3DSZJXGE4GCND4NA5G2MJ2MJXW65DTORZGC4BOORQXEAAAAAAHRE73ZE
seed: http://192.168.12.254:7606/public/download-set/host/m1/bootstrap.tar?set=ICIXKJJWA6U4RQESD3KQMWO3IBW6THG4FJUM2HUNFPTIODVSXGDPXTCHSFT6IOUZO6LBAG65QIGYUMIZA3TEHTPB6BXKUFONNUWKUWAJAQRE2GDEOC4RWAAAQA3DSZJXMNSDGN34NA5G2MJ2MJXW65DTORZGC4BOORQXEAAAAAACMICVFM