feat(dkl rc)

This commit is contained in:
Mikaël Cluseau
2026-04-16 11:53:37 +02:00
parent 6059d81b3d
commit 15fe8c9ce6
6 changed files with 1048 additions and 214 deletions
+99 -19
View File
@@ -9,6 +9,8 @@ use tokio::fs;
#[derive(Parser)]
#[command()]
struct Cli {
#[arg(long)]
log_to: Option<PathBuf>,
#[command(subcommand)]
command: Command,
}
@@ -32,17 +34,17 @@ enum Command {
Logger {
/// Path where the logs are stored
#[arg(long, short = 'p', default_value = "/var/log", env = "DKL_LOG_PATH")]
log_path: String,
log_path: PathBuf,
/// Name of the log instead of the command's basename
#[arg(long, short = 'n')]
log_name: Option<String>,
log_name: Option<PathBuf>,
/// prefix log lines with time & stream
#[arg(long)]
with_prefix: bool,
/// exec command in this cgroup
#[arg(long)]
cgroup: Option<String>,
command: String,
command: PathBuf,
args: Vec<String>,
},
Log {
@@ -90,6 +92,11 @@ enum Command {
#[command(subcommand)]
cmd: CgCmd,
},
Rc {
#[command(subcommand)]
cmd: RcCmd,
},
}
#[derive(Subcommand)]
@@ -104,16 +111,56 @@ enum CgCmd {
},
}
#[derive(Subcommand)]
enum RcCmd {
Run,
Ls,
Status,
ReloadConfig,
Start {
#[arg(add = completions(dkl::rc::complete))]
key: String,
},
Stop {
#[arg(add = completions(dkl::rc::complete))]
key: String,
},
Reload {
#[arg(add = completions(dkl::rc::complete))]
key: String,
},
Sig {
#[arg(add = completions(dkl::rc::complete))]
key: String,
signal: u32,
},
Ctl {
args: Vec<String>,
},
}
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<()> {
clap_complete::CompleteEnv::with_factory(Cli::command).complete();
let cli = Cli::parse();
env_logger::builder()
.parse_filters("info")
.parse_default_env()
.init();
{
let mut builder = env_logger::builder();
builder.parse_filters("info").parse_default_env();
if let Some(log_to) = cli.log_to {
builder.target(env_logger::Target::Pipe(Box::new(
std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(log_to)
.unwrap(),
)));
}
builder.init();
}
use Command as C;
match cli.command {
@@ -127,23 +174,24 @@ async fn main() -> Result<()> {
apply_config(&config, &filters, &prefix, dry_run).await
}
C::Logger {
ref log_path,
ref log_name,
log_path,
log_name,
with_prefix,
cgroup,
command,
args,
} => {
let command = command.as_str();
let log_name = log_name.as_deref().unwrap_or_else(|| basename(command));
let log_name = log_name.unwrap_or_else(|| command.file_prefix().unwrap().into());
dkl::logger::Logger {
let logger = dkl::logger::Logger {
log_path,
log_name,
with_prefix,
}
.run(cgroup, command, &args)
.await
cgroup,
};
let cmd = logger.setup(command, &args).await?;
logger.exec(cmd).await
}
C::Log {
log_path,
@@ -187,6 +235,20 @@ async fn main() -> Result<()> {
cols,
} => Ok(dkl::cgroup::ls(root, &exclude, cols.as_deref()).await?),
},
C::Rc { cmd } => match cmd {
RcCmd::Run => Ok(dkl::rc::run().await?),
RcCmd::Ls => Ok(dkl::rc::ctl(["ls"]).await?),
RcCmd::Status => Ok(dkl::rc::ctl(["status"]).await?),
RcCmd::ReloadConfig => Ok(dkl::rc::ctl(["reload-config"]).await?),
RcCmd::Start { key } => Ok(dkl::rc::ctl(["start", &key]).await?),
RcCmd::Stop { key } => Ok(dkl::rc::ctl(["stop", &key]).await?),
RcCmd::Reload { key } => Ok(dkl::rc::ctl(["reload", &key]).await?),
RcCmd::Sig { key, signal } => {
Ok(dkl::rc::ctl(["sig", &key, &signal.to_string()]).await?)
}
RcCmd::Ctl { args } => Ok(dkl::rc::ctl(&args).await?),
},
}
}
@@ -298,10 +360,6 @@ fn parse_ts_arg(ts: Option<String>) -> Result<Option<dkl::logger::Timestamp>> {
}
}
fn basename(path: &str) -> &str {
path.rsplit_once('/').map_or(path, |split| split.1)
}
fn parse_globs(filters: &[String]) -> Result<Vec<glob::Pattern>> {
let mut errors = false;
let filters = (filters.iter())
@@ -321,3 +379,25 @@ fn parse_globs(filters: &[String]) -> Result<Vec<glob::Pattern>> {
Ok(filters)
}
use clap_complete::{ArgValueCandidates, CompletionCandidate};
fn completions(f: impl AsyncFn() -> Vec<String> + Send + Sync + 'static) -> ArgValueCandidates {
let f = std::sync::Arc::new(f);
ArgValueCandidates::new(move || {
let f = f.clone();
std::thread::spawn(move || {
tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap()
.block_on(async move { f().await })
})
.join()
.into_iter()
.flatten()
.map(CompletionCandidate::new)
.collect::<Vec<_>>()
})
}
+88 -50
View File
@@ -2,12 +2,13 @@ use async_compression::tokio::write::{ZstdDecoder, ZstdEncoder};
use chrono::{DurationRound, TimeDelta, Utc};
use eyre::{format_err, Result};
use log::{debug, error, warn};
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use tokio::{
fs::File,
io::{self, AsyncBufReadExt, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader, BufWriter},
process,
process::{Child, Command},
sync::mpsc,
time::{sleep, Duration},
};
@@ -22,67 +23,92 @@ const TRUNC_DELTA: TimeDelta = TimeDelta::hours(1);
const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
const WRITE_RETRY_DELAY: Duration = Duration::from_secs(1);
pub struct Logger<'t> {
pub log_path: &'t str,
pub log_name: &'t str,
pub struct Logger {
pub log_path: PathBuf,
pub log_name: PathBuf,
pub with_prefix: bool,
pub cgroup: Option<String>,
}
impl<'t> Logger<'t> {
pub async fn run(&self, cgroup: Option<String>, command: &str, args: &[String]) -> Result<()> {
impl Logger {
pub async fn setup<I, S>(&self, command: impl AsRef<OsStr>, args: I) -> fs::Result<Command>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
// make sure we can at least open the log before starting the command
let archives_path = &format!("{path}/archives", path = self.log_path);
(fs::create_dir_all(archives_path).await)
.map_err(|e| format_err!("failed to create archives dir: {e}"))?;
let archives_read_dir = (fs::read_dir(archives_path).await)
.map_err(|e| format_err!("failed to list archives: {e}"))?;
let archives_path = &self.log_path.join("archives");
fs::create_dir_all(archives_path).await?;
let mut prev_stamp = trunc_ts(Utc::now());
let mut current_log = BufWriter::new(self.open_log(prev_stamp).await?);
let archives_read_dir = fs::read_dir(archives_path).await?;
let prev_stamp = trunc_ts(Utc::now());
tokio::spawn(compress_archives(
archives_read_dir,
self.log_name.to_string(),
self.log_name.clone(),
prev_stamp.format(TS_FORMAT).to_string(),
));
// start the command
let mut cmd = process::Command::new(command);
cmd.args(args).stdout(Stdio::piped()).stderr(Stdio::piped());
if let Some(cgroup) = cgroup.as_deref() {
let mut cg_path = PathBuf::from(cgroup::ROOT);
cg_path.push(cgroup);
cg_path.push(self.log_name);
// create the command
let mut cmd = Command::new(command);
use std::io::ErrorKind as K;
match tokio::fs::create_dir(&cg_path).await {
Ok(_) => debug!("created dir {}", cg_path.display()),
Err(e) if e.kind() == K::AlreadyExists => {
debug!("existing dir {}", cg_path.display())
}
Err(e) => return Err(fs::Error::CreateDir(cg_path, e).into()),
}
cmd.args(args);
if let Some(cgroup) = self.cgroup.as_deref() {
let cg_path = PathBuf::from(cgroup::ROOT)
.join(cgroup)
.join(&self.log_name);
fs::create_dir_all(&cg_path).await?;
let procs_file = cg_path.join("cgroup.procs");
debug!("procs file {}", procs_file.display());
fs::write(&procs_file, b"0").await?;
unsafe { cmd.pre_exec(move || std::fs::write(&procs_file, b"0")) };
}
let mut child = cmd.spawn().map_err(|e| format_err!("exec failed: {e}"))?;
Ok(cmd)
}
let (tx, mut rx) = mpsc::channel(8);
pub fn spawn(self, mut cmd: Command) -> std::io::Result<Child> {
// setup outputs for capture
cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
// spawn
let mut child = cmd.spawn()?;
// capture outputs
let (tx, rx) = mpsc::channel(8);
tokio::spawn(copy("stdout", child.stdout.take().unwrap(), tx.clone()));
tokio::spawn(copy("stderr", child.stderr.take().unwrap(), tx));
// log outputs
tokio::spawn(self.log_stream(rx));
Ok(child)
}
// TODO: Result<!> when stable
pub async fn exec(self, cmd: Command) -> Result<()> {
let mut child = self.spawn(cmd)?;
// forward signals
if let Some(child_pid) = child.id() {
forward_signals_to(child_pid as i32);
}
// handle output
let status = child.wait().await?;
std::process::exit(status.code().unwrap_or(-1));
}
async fn log_stream(self, mut rx: mpsc::Receiver<LogItem>) {
let mut flush_ticker = tokio::time::interval(FLUSH_INTERVAL);
let mut prev_stamp = trunc_ts(Utc::now());
let mut current_log = BufWriter::new(self.eventually_open_log(prev_stamp).await);
loop {
tokio::select!(
r = rx.recv() => {
@@ -102,15 +128,11 @@ impl<'t> Logger<'t> {
);
}
let status = child.wait().await?;
// finalize
while let Err(e) = current_log.flush().await {
error!("final log flush failed: {e}");
while let Err(e) = current_log.shutdown().await {
error!("final log shutdown failed: {e}");
sleep(WRITE_RETRY_DELAY).await;
}
std::process::exit(status.code().unwrap_or(-1));
}
async fn log_item(
@@ -143,6 +165,18 @@ impl<'t> Logger<'t> {
Ok(())
}
async fn eventually_open_log(&self, ts: Timestamp) -> File {
loop {
match self.open_log(ts).await {
Ok(log) => break log,
Err(e) => {
error!("open log failed: {e}");
sleep(WRITE_RETRY_DELAY).await;
}
}
}
}
async fn open_log(&self, ts: Timestamp) -> Result<File> {
let log_file = &self.archive_path(ts);
@@ -153,8 +187,9 @@ impl<'t> Logger<'t> {
.open(log_file)
.await?;
let link_src = &PathBuf::from(self.log_path)
.join(self.log_name)
let link_src = &self
.log_path
.join(&self.log_name)
.with_added_extension("log");
let link_tgt = &self.archive_rel_path(ts);
@@ -171,17 +206,15 @@ impl<'t> Logger<'t> {
}
fn archive_path(&self, ts: Timestamp) -> PathBuf {
PathBuf::from(self.log_path).join(self.archive_rel_path(ts))
self.log_path.join(self.archive_rel_path(ts))
}
fn archive_rel_path(&self, ts: Timestamp) -> PathBuf {
PathBuf::from("archives").join(self.archive_file(ts))
}
fn archive_file(&self, ts: Timestamp) -> String {
format!(
"{name}.{ts}.log",
name = self.log_name,
ts = ts.format(TS_FORMAT),
)
fn archive_file(&self, ts: Timestamp) -> PathBuf {
self.log_name
.with_added_extension(ts.format(TS_FORMAT).to_string())
.with_added_extension("log")
}
}
@@ -266,7 +299,7 @@ async fn copy(stream_name: &'static str, out: impl AsyncRead + Unpin, tx: mpsc::
line.extend_from_slice(buf);
out.consume(len);
send_line!();
} else if buf.len() > remaining {
} else if buf.len() >= remaining {
line.extend_from_slice(&buf[..remaining]);
out.consume(remaining);
send_line!();
@@ -287,7 +320,12 @@ pub fn trunc_ts(ts: Timestamp) -> Timestamp {
.expect("duration_trunc failed")
}
async fn compress_archives(mut read_dir: fs::ReadDir, log_name: String, exclude_ts: String) {
async fn compress_archives(
mut read_dir: fs::ReadDir,
log_name: impl AsRef<Path>,
exclude_ts: String,
) {
let log_name = log_name.as_ref();
loop {
let Ok(Some(entry)) =
(read_dir.next_entry().await).inspect_err(|e| error!("archive dir read failed: {e}"))
+494
View File
@@ -1 +1,495 @@
use eyre::format_err;
use log::{error, info, warn};
use nix::sys::signal::Signal;
use std::collections::{BTreeMap as Map, BTreeSet as Set};
use std::path::PathBuf;
use std::sync::LazyLock;
use tokio::{
io::{copy, AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader},
net::{UnixListener, UnixStream},
sync::{mpsc, watch, RwLock},
};
use crate::{cgroup, fs};
mod runner;
use runner::{Child, State};
const CFG_PATH: &str = "/etc/direktil/rc.yaml";
const SOCK_PATH: &str = "/run/dkl-rc/ctl.sock"; // Path::new when stable
#[derive(Default, serde::Serialize, serde::Deserialize)]
pub struct Config {
#[serde(default, skip_serializing_if = "Map::is_empty")]
pub cgroups: Map<String, CgroupConfig>,
}
#[derive(serde::Serialize, serde::Deserialize)]
pub struct CgroupConfig {
pub controllers: String,
#[serde(default, skip_serializing_if = "Map::is_empty")]
pub settings: Map<String, String>,
#[serde(default, skip_serializing_if = "Map::is_empty")]
pub services: Map<String, Service>,
}
pub type Service = Vec<String>;
static MANAGER: LazyLock<RwLock<Manager>> = LazyLock::new(|| RwLock::new(Manager::default()));
type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, thiserror::Error)]
enum Error {
#[error("invalid command: {0:?}")]
InvalidCommand(String),
#[error("config read failed: {0}")]
ConfigRead(fs::Error),
#[error("config parse failed: {0}")]
ConfigParse(serde_yaml::Error),
#[error("cgroup setup failed: {0}")]
CgroupSetup(fs::Error),
#[error("invalid key (cgroup/service)")]
InvalidKey,
#[error("unknown cgroup: {0:?}")]
UnknownCgroup(String),
#[error("unknown service: {0:?}")]
UnknownService(String),
#[error("invalid signal: {0:?}")]
InvalidSignal(String),
#[error("process exited")]
ProcessExited,
#[error("nothing running under {0:?}")]
NotRunning(String),
#[error("kill failed: {0:?}")]
KillFailed(nix::Error),
#[error("service runner is dead")]
RunnerDead,
}
pub async fn run() -> eyre::Result<()> {
info!("starting");
tokio::spawn(wait_terminate());
let _ = reload_config().await;
tokio::spawn(wait_reload());
if let Some(sock_dir) = PathBuf::from(SOCK_PATH).parent() {
let _ = tokio::fs::DirBuilder::new()
.mode(0o700)
.create(sock_dir)
.await;
}
let _ = tokio::fs::remove_file(SOCK_PATH).await;
let listener = UnixListener::bind(SOCK_PATH)?;
loop {
let Ok((conn, _)) = listener.accept().await else {
warn!("listener closed");
break;
};
tokio::spawn(async move { handle(conn).await });
}
cleanup().await;
Ok(())
}
async fn cleanup() {
let _ = tokio::fs::remove_file(SOCK_PATH).await;
}
pub async fn ctl<I, S>(args: I) -> eyre::Result<()>
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
let args: Vec<_> = args.into_iter().map(|s| s.into()).collect();
let args = format!("{}\n", args.join(" "));
match ctl_exec(args.as_bytes()).await {
Ok(mut rd) => {
copy(&mut rd, &mut tokio::io::stdout()).await?;
std::process::exit(0);
}
Err(e) => {
eprint!("{e}");
std::process::exit(1);
}
}
}
async fn ctl_exec(request: &[u8]) -> eyre::Result<BufReader<UnixStream>> {
let mut conn = UnixStream::connect(SOCK_PATH)
.await
.map_err(|e| format_err!("{SOCK_PATH}: {e}"))?;
conn.write_all(request).await?;
let mut rd = BufReader::with_capacity(64, conn);
let mut code = String::new();
rd.read_line(&mut code).await?;
let code: i32 = code.trim_ascii().parse()?;
if code != 0 {
let mut err = String::new();
rd.read_to_string(&mut err).await?;
return Err(format_err!("{}", err.trim_ascii_end()));
}
Ok(rd)
}
async fn handle(mut conn: UnixStream) {
let (rd, mut wr) = conn.split();
let mut rd = BufReader::with_capacity(64, rd).lines();
let Ok(Some(line)) = rd.next_line().await else {
return;
};
let mut line = line.split_ascii_whitespace();
macro_rules! next {
() => {{
match line.next() {
Some(v) => v,
None => return,
}
}};
}
let r = match next!() {
"ls" => Ok(Some(ls().await)),
"status" => Ok(Some(status().await)),
"reload-config" => reload_config().await.map(|_| None),
"start" => start(next!()).await.map(|_| None),
"stop" => stop(next!()).await.map(|_| None),
"reload" => reload(next!()).await.map(|_| None),
"sig" => sig(next!(), next!()).await.map(|_| None),
cmd => Err(Error::InvalidCommand(cmd.into())),
};
let _ = match r {
Ok(None) => wr.write_all(b"0\n").await,
Ok(Some(s)) => wr.write_all(format!("0\n{s}\n").as_bytes()).await,
Err(e) => wr.write_all(format!("1\n{e}\n").as_bytes()).await,
};
let _ = wr.shutdown().await;
}
async fn wait_terminate() {
use tokio::signal::unix::{signal, SignalKind};
let Ok(mut sig) = signal(SignalKind::terminate())
.inspect_err(|e| error!("failed to listen to SIGTERM (will be ignored): {e}"))
else {
return;
};
sig.recv().await;
info!("SIGTERM received, terminating");
MANAGER.write().await.terminate().await;
cleanup().await;
log::logger().flush();
std::process::exit(0);
}
async fn wait_reload() {
use tokio::signal::unix::{signal, SignalKind};
let Ok(mut sig) = signal(SignalKind::hangup())
.inspect_err(|e| error!("failed to listen to SIGHUP (will be ignored): {e}"))
else {
return;
};
loop {
sig.recv().await;
let _ = reload_config().await;
}
}
async fn reload_config() -> Result<()> {
let cfg = (fs::read(CFG_PATH).await)
.map_err(Error::ConfigRead)
.inspect_err(|e| error!("{e}"))?;
let cfg = serde_yaml::from_slice::<Config>(&cfg)
.map_err(Error::ConfigParse)
.inspect_err(|e| error!("{CFG_PATH}: {e}"))?;
info!("applying new config");
let r = MANAGER.write().await.apply_config(cfg).await;
match &r {
Ok(_) => info!("applied new config"),
Err(e) => info!("failed to apply new config: {e}"),
}
r
}
async fn ls() -> String {
let mut keys = String::new();
for (i, k) in MANAGER.read().await.runners.keys().enumerate() {
if i != 0 {
keys.push('\n');
}
keys.push_str(k);
}
keys
}
async fn status() -> String {
let status = MANAGER.read().await.status();
let mut table = tabled::builder::Builder::new();
table.push_record(["cgroup", "service", "PID", "state", "msg"]);
for (cg_svc, child) in status {
let (cg, svc) = cg_svc.split_once('/').unwrap();
let pid = child.pid.map(|p| p.to_string());
table.push_record([
cg,
svc,
pid.as_deref().unwrap_or(""),
&format!("{:?}", child.state),
child.msg.as_deref().unwrap_or(""),
]);
}
(table.build())
.with(tabled::settings::Style::psql())
.to_string()
}
async fn start(key: &str) -> Result<()> {
MANAGER.write().await.start(key).await
}
async fn stop(key: &str) -> Result<()> {
MANAGER.write().await.stop(key).await
}
async fn reload(key: &str) -> Result<()> {
MANAGER.read().await.reload(key).await
}
async fn sig(key: &str, sig: &str) -> Result<()> {
let sig: Signal = sig.parse().map_err(|_| Error::InvalidSignal(sig.into()))?;
signal(key, sig).await
}
async fn child_for(key: &str) -> Result<Child> {
MANAGER.read().await.child_for(key)
}
async fn signal(key: &str, sig: Signal) -> Result<()> {
child_for(key).await?.kill(sig)
}
fn child_key(cg: &str, svc: &str) -> String {
[cg, svc].join("/")
}
fn split_key(key: &str) -> Result<(&str, &str)> {
key.split_once('/').ok_or(Error::InvalidKey)
}
#[derive(Default)]
struct Manager {
cfg: Config,
procs: Map<String, watch::Receiver<Child>>,
runners: Map<String, mpsc::Sender<runner::Cmd>>,
}
impl Manager {
fn status(&self) -> Vec<(String, Child)> {
(self.procs.iter())
.map(|(n, c)| (n.clone(), c.borrow().clone()))
.collect()
}
fn child_for(&self, key: &str) -> Result<Child> {
(self.procs.get(key))
.map(|c| c.borrow().clone())
.ok_or_else(|| Error::NotRunning(key.into()))
}
async fn apply_config(&mut self, new_cfg: Config) -> Result<()> {
// create and configure cgroups
for (name, cg) in &new_cfg.cgroups {
let cg_path = PathBuf::from(cgroup::ROOT).join(name);
fs::create_dir_all(&cg_path)
.await
.map_err(Error::CgroupSetup)?;
fs::write(
cg_path.join("cgroup.subtree_control"),
cg.controllers.as_bytes(),
)
.await
.map_err(Error::CgroupSetup)?;
for (setting, value) in &cg.settings {
fs::write(cg_path.join(setting), value.as_bytes())
.await
.map_err(Error::CgroupSetup)?;
}
}
let new_svcs: Set<_> = new_cfg.service_keys().collect();
// stop removed services
let to_stop = Map::from_iter(self.runners.extract_if(.., |k, _| !new_svcs.contains(k)));
let mut stopped = Set::new();
for (key, runner_cmd) in to_stop {
if runner_cmd.send(runner::Cmd::Stop).await.is_err() {
// runner already dead
continue;
}
stopped.insert(key);
}
// start added services
for (key, cg, svc, service) in new_cfg.services() {
if self.runners.contains_key(&key) {
continue;
};
let cmd = self.spawn_runner(key, cg, svc, service.clone());
if let Err(e) = cmd.send(runner::Cmd::Start).await {
error!("runner instantly died: {e}");
}
}
// wait & cleanup stopped
for key in stopped {
let Some(mut child_rx) = self.procs.remove(&key) else {
continue;
};
let _ = child_rx
.wait_for(|c| matches!(c.state, State::Finalized))
.await;
}
self.cfg = new_cfg;
Ok(())
}
async fn terminate(&mut self) {
self.runners.clear();
for child in self.procs.values_mut() {
let _ = child
.wait_for(|c| matches!(c.state, State::Finalized))
.await;
}
self.procs.clear();
}
fn runner(&mut self, key: &str) -> Result<mpsc::Sender<runner::Cmd>> {
if let Some(c) = self.runners.get(key) {
return Ok(c.clone());
}
let (cg, svc) = split_key(key)?;
let service = self.cfg.service(key)?;
Ok(self.spawn_runner(key.into(), cg, svc, service.clone()))
}
fn spawn_runner(
&mut self,
key: String,
cg: &str,
svc: &str,
service: Service,
) -> mpsc::Sender<runner::Cmd> {
let (runner, child_rx, cmds_tx) = runner::new(cg, svc, service);
tokio::spawn(runner.run());
self.procs.insert(key.clone(), child_rx);
self.runners.insert(key, cmds_tx.clone());
cmds_tx
}
async fn cmd(&mut self, key: &str, cmd: runner::Cmd) -> Result<()> {
if self.runner(key)?.send(cmd).await.is_err() {
// runner died
self.runners.remove(key);
return Err(Error::RunnerDead);
}
Ok(())
}
async fn start(&mut self, key: &str) -> Result<()> {
self.cmd(key, runner::Cmd::Start).await
}
async fn stop(&mut self, key: &str) -> Result<()> {
self.cmd(key, runner::Cmd::Stop).await
}
async fn reload(&self, key: &str) -> Result<()> {
let proc = (self.procs.get(key)) //
.ok_or_else(|| Error::UnknownService(key.into()))?;
proc.borrow().reload()
}
}
impl Config {
fn cgroup(&self, cg: &str) -> Result<&CgroupConfig> {
self.cgroups
.get(cg)
.ok_or_else(|| Error::UnknownCgroup(cg.into()))
}
fn service(&self, key: &str) -> Result<&Service> {
let (cg, svc) = split_key(key)?;
self.cgroup(cg)?.service(svc)
}
fn service_keys(&self) -> impl Iterator<Item = String> {
(self.cgroups.iter())
.map(|(cg_name, cg)| cg.services.keys().map(move |n| child_key(cg_name, n)))
.flatten()
}
fn services(&self) -> impl Iterator<Item = (String, &String, &String, &Service)> {
(self.cgroups.iter())
.map(|(cg_name, cg)| {
cg.services
.iter()
.map(move |(n, service)| (child_key(cg_name, n), cg_name, n, service))
})
.flatten()
}
}
impl CgroupConfig {
fn service(&self, svc: &str) -> Result<&Vec<String>> {
self.services
.get(svc)
.ok_or_else(|| Error::UnknownService(svc.into()))
}
}
pub async fn complete() -> Vec<String> {
let mut r = vec![];
let Ok(rd) = ctl_exec(b"ls\n").await else {
return r;
};
let mut rd = rd.lines();
while let Some(line) = rd.next_line().await.ok().flatten() {
r.push(line);
}
r
}
+263
View File
@@ -0,0 +1,263 @@
use log::{error, warn};
use nix::{
sys::signal::{kill, Signal},
unistd::Pid,
};
use std::num::NonZero;
use tokio::{
process, select,
sync::{mpsc, watch},
time::{sleep, sleep_until, Duration, Instant},
};
use super::{Error, Result, Service};
use crate::logger::Logger;
const LOG_PATH: &str = "/var/log";
const TERM_DELAY: Duration = Duration::from_secs(30);
const KILL_DELAY: Duration = Duration::from_secs(10);
const RESTART_DELAY: Duration = Duration::from_secs(8);
#[derive(Debug, Clone, Copy)]
pub enum Cmd {
Start,
Stop,
}
#[derive(Default, Clone, Copy, Debug)]
pub enum State {
#[default]
NeverStarted,
Starting,
Running,
Crashed,
Stopping,
Stopped,
Finalized,
}
pub fn new(
cg: impl Into<String>,
svc: impl Into<String>,
service: Service,
) -> (Runner, watch::Receiver<Child>, mpsc::Sender<Cmd>) {
let (manager, child_rx) = ProcessManager::new(service);
let (cmds_tx, cmds_rx) = mpsc::channel(1);
let r = Runner {
cg: cg.into(),
svc: svc.into(),
cmds_rx,
manager,
};
(r, child_rx, cmds_tx)
}
pub struct Runner {
cg: String,
svc: String,
cmds_rx: mpsc::Receiver<Cmd>,
manager: ProcessManager,
}
impl Runner {
pub async fn run(mut self) {
self.manager.update(State::NeverStarted);
loop {
let cmd = select! {
cmd = self.manager.manage() => {
cmd
}
cmd = self.cmds_rx.recv() => {
let Some(cmd) = cmd else {
break; // command side dropped
};
Some(cmd)
}
};
if let Some(cmd) = cmd {
self.process_cmd(cmd).await;
}
}
self.process_cmd(Cmd::Stop).await;
self.manager.update(State::Finalized);
}
async fn process_cmd(&mut self, cmd: Cmd) {
let cg = &self.cg;
let svc = &self.svc;
match cmd {
Cmd::Start => {
self.manager.start(cg, svc).await;
}
Cmd::Stop => {
self.manager.stop().await;
}
}
}
}
struct ProcessManager {
service: Service,
child_tx: watch::Sender<Child>,
process: Option<process::Child>,
restart_deadline: Option<Instant>,
}
impl ProcessManager {
fn new(service: Service) -> (Self, watch::Receiver<Child>) {
let (child_tx, child_rx) = watch::channel(Child::default());
let pm = Self {
service,
child_tx,
process: None,
restart_deadline: None,
};
(pm, child_rx)
}
/// runs a management iteration (ie: waiting for the child or a restart deadline).
async fn manage(&mut self) -> Option<Cmd> {
if let Some(process) = self.process.as_mut() {
let msg = match process.wait().await {
Ok(status) => status.to_string(),
Err(e) => e.to_string(),
};
self.crashed(msg);
self.process = None;
self.restart_deadline = Some(Instant::now() + RESTART_DELAY);
None
} else if let Some(deadline) = self.restart_deadline {
sleep_until(deadline).await;
Some(Cmd::Start)
} else {
std::future::pending().await
}
}
async fn start(&mut self, cg: &str, svc: &str) {
if self.process.is_some() {
return;
}
self.update(State::Starting);
let logger = Logger {
log_path: LOG_PATH.into(),
log_name: svc.into(),
with_prefix: false,
cgroup: Some(cg.into()),
};
let mut args = self.service.iter();
let Some(cmd) = args.next() else {
error!("{cg}/{svc}: empty command");
return;
};
let Ok(cmd) = (logger.setup(cmd, args).await)
.inspect_err(|e| self.crashed(format!("setup failed: {e}")))
else {
return;
};
let Ok(child) = logger
.spawn(cmd)
.inspect_err(|e| self.crashed(format!("exec failed: {e}")))
else {
return;
};
self.process = Some(child);
self.restart_deadline = None;
self.update(State::Running);
}
async fn stop(&mut self) {
self.restart_deadline = None;
let Some(mut process) = self.process.take() else {
return;
};
let Some(pid) = process.id() else {
let _ = process.wait().await; // already dead, reap it
self.update(State::Stopped);
return;
};
let pid = pid as i32;
self.update_full(pid, State::Stopping, None);
let pid = Pid::from_raw(pid);
let _ = kill(pid, Signal::SIGTERM).inspect_err(|e| error!("kill -TERM {pid} failed: {e}"));
select! {
_ = process.wait() => {
self.update(State::Stopped);
return
},
_ = sleep(TERM_DELAY) => {
warn!("process {pid} did not exit during the grace period, killing");
let _ = process.kill().await.inspect_err(|e| error!("kill -KILL {pid} failed: {e}"));
}
}
select! {
_ = process.wait() => {
self.update(State::Stopped);
return
},
_ = sleep(KILL_DELAY) => {
error!("process {pid} still alive after SIGKILL");
}
}
}
fn process_pid(&self) -> i32 {
(self.process.as_ref())
.and_then(|c| Some(c.id()? as i32))
.unwrap_or(0)
}
fn update(&self, state: State) {
let pid = self.process_pid();
self.update_full(pid, state, None);
}
fn update_full(&self, pid: i32, state: State, msg: Option<String>) {
self.child_tx.send_replace(Child {
pid: NonZero::new(pid),
state,
msg,
});
}
fn crashed(&self, msg: String) {
let pid = self.process_pid();
self.update_full(pid, State::Crashed, Some(msg));
}
}
#[derive(Clone, Default)]
pub struct Child {
pub pid: Option<NonZero<i32>>,
pub state: State,
pub msg: Option<String>,
}
impl Child {
pub fn reload(&self) -> Result<()> {
self.kill(Signal::SIGHUP)
}
pub fn kill(&self, sig: Signal) -> Result<()> {
let Some(pid) = self.pid else {
return Err(Error::ProcessExited);
};
kill(Pid::from_raw(pid.into()), sig).map_err(|e| Error::KillFailed(e))
}
}