use log::{error, warn}; use nix::{ sys::signal::{kill, Signal}, unistd::Pid, }; use std::num::NonZero; use tokio::{ process, select, sync::{mpsc, watch}, time::{sleep, sleep_until, Duration, Instant}, }; use super::{Error, Result, Service}; use crate::logger::Logger; const LOG_PATH: &str = "/var/log"; const TERM_DELAY: Duration = Duration::from_secs(30); const KILL_DELAY: Duration = Duration::from_secs(10); const RESTART_DELAY: Duration = Duration::from_secs(8); #[derive(Debug, Clone, Copy)] pub enum Cmd { Start, Stop, } #[derive(Default, Clone, Copy, Debug)] pub enum State { #[default] NeverStarted, Starting, Running, Crashed, Stopping, Stopped, Finalized, } pub fn new( cg: impl Into, svc: impl Into, service: Service, ) -> (Runner, watch::Receiver, mpsc::Sender) { let (manager, child_rx) = ProcessManager::new(service); let (cmds_tx, cmds_rx) = mpsc::channel(1); let r = Runner { cg: cg.into(), svc: svc.into(), cmds_rx, manager, }; (r, child_rx, cmds_tx) } pub struct Runner { cg: String, svc: String, cmds_rx: mpsc::Receiver, manager: ProcessManager, } impl Runner { pub async fn run(mut self) { self.manager.update(State::NeverStarted); loop { let cmd = select! { cmd = self.manager.manage() => { cmd } cmd = self.cmds_rx.recv() => { let Some(cmd) = cmd else { break; // command side dropped }; Some(cmd) } }; if let Some(cmd) = cmd { self.process_cmd(cmd).await; } } self.process_cmd(Cmd::Stop).await; self.manager.update(State::Finalized); } async fn process_cmd(&mut self, cmd: Cmd) { let cg = &self.cg; let svc = &self.svc; match cmd { Cmd::Start => { self.manager.start(cg, svc).await; } Cmd::Stop => { self.manager.stop().await; } } } } struct ProcessManager { service: Service, child_tx: watch::Sender, process: Option, restart_deadline: Option, } impl ProcessManager { fn new(service: Service) -> (Self, watch::Receiver) { let (child_tx, child_rx) = watch::channel(Child::default()); let pm = Self { service, child_tx, process: None, restart_deadline: None, }; (pm, child_rx) } /// runs a management iteration (ie: waiting for the child or a restart deadline). async fn manage(&mut self) -> Option { if let Some(process) = self.process.as_mut() { let msg = match process.wait().await { Ok(status) => status.to_string(), Err(e) => e.to_string(), }; self.crashed(msg); self.process = None; self.restart_deadline = Some(Instant::now() + RESTART_DELAY); None } else if let Some(deadline) = self.restart_deadline { sleep_until(deadline).await; Some(Cmd::Start) } else { std::future::pending().await } } async fn start(&mut self, cg: &str, svc: &str) { if self.process.is_some() { return; } self.update(State::Starting); let logger = Logger { log_path: LOG_PATH.into(), log_name: svc.into(), with_prefix: false, cgroup: Some(cg.into()), }; let mut args = self.service.iter(); let Some(cmd) = args.next() else { error!("{cg}/{svc}: empty command"); return; }; let Ok(cmd) = (logger.setup(cmd, args).await) .inspect_err(|e| self.crashed(format!("setup failed: {e}"))) else { return; }; let Ok(child) = logger .spawn(cmd) .inspect_err(|e| self.crashed(format!("exec failed: {e}"))) else { return; }; self.process = Some(child); self.restart_deadline = None; self.update(State::Running); } async fn stop(&mut self) { self.restart_deadline = None; let Some(mut process) = self.process.take() else { return; }; let Some(pid) = process.id() else { let _ = process.wait().await; // already dead, reap it self.update(State::Stopped); return; }; let pid = pid as i32; self.update_full(pid, State::Stopping, None); let pid = Pid::from_raw(pid); let _ = kill(pid, Signal::SIGTERM).inspect_err(|e| error!("kill -TERM {pid} failed: {e}")); select! { _ = process.wait() => { self.update(State::Stopped); return }, _ = sleep(TERM_DELAY) => { warn!("process {pid} did not exit during the grace period, killing"); let _ = process.kill().await.inspect_err(|e| error!("kill -KILL {pid} failed: {e}")); } } select! { _ = process.wait() => { self.update(State::Stopped); return }, _ = sleep(KILL_DELAY) => { error!("process {pid} still alive after SIGKILL"); } } } fn process_pid(&self) -> i32 { (self.process.as_ref()) .and_then(|c| Some(c.id()? as i32)) .unwrap_or(0) } fn update(&self, state: State) { let pid = self.process_pid(); self.update_full(pid, state, None); } fn update_full(&self, pid: i32, state: State, msg: Option) { self.child_tx.send_replace(Child { pid: NonZero::new(pid), state, msg, }); } fn crashed(&self, msg: String) { let pid = self.process_pid(); self.update_full(pid, State::Crashed, Some(msg)); } } #[derive(Clone, Default)] pub struct Child { pub pid: Option>, pub state: State, pub msg: Option, } impl Child { pub fn reload(&self) -> Result<()> { self.kill(Signal::SIGHUP) } pub fn kill(&self, sig: Signal) -> Result<()> { let Some(pid) = self.pid else { return Err(Error::ProcessExited); }; kill(Pid::from_raw(pid.into()), sig).map_err(|e| Error::KillFailed(e)) } }