use {
super::DaemonConfig,
crate::protocol::ipc::IpcStream,
std::{
fs,
io,
time::Duration,
},
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DaemonStatus {
Running,
NotRunning,
StaleSocket,
}
pub async fn check_daemon_status(
config: &DaemonConfig,
) -> io::Result<DaemonStatus> {
let endpoint = config.endpoint();
match IpcStream::connect(&endpoint).await {
| Ok(_) => Ok(DaemonStatus::Running),
| Err(e) if e.kind() == io::ErrorKind::ConnectionRefused => {
#[cfg(unix)]
{
if is_socket_stale(config)? {
return Ok(DaemonStatus::StaleSocket);
}
}
Ok(DaemonStatus::NotRunning)
},
| Err(e) if e.kind() == io::ErrorKind::NotFound => {
Ok(DaemonStatus::NotRunning)
},
| Err(e) => Err(e),
}
}
#[cfg(unix)]
fn is_socket_stale(config: &DaemonConfig) -> io::Result<bool> {
let pid_path = config.pid_file_path();
if !pid_path.exists() {
return Ok(true);
}
let pid_str = fs::read_to_string(&pid_path)?;
let pid: i32 = pid_str
.trim()
.parse()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
let is_alive = unsafe { libc::kill(pid, 0) } == 0;
if is_alive { Ok(false) } else { Ok(true) }
}
#[cfg(unix)]
pub fn cleanup_stale_socket(config: &DaemonConfig) -> io::Result<()> {
let socket_path = config.socket_path();
let pid_path = config.pid_file_path();
let lock_path = config.lock_file_path();
if socket_path.exists() {
fs::remove_file(&socket_path)?;
otel::event!(
"daemon_stale_socket_removed",
"path" = socket_path.display().to_string()
);
}
if pid_path.exists() {
fs::remove_file(&pid_path)?;
}
if lock_path.exists() {
fs::remove_file(&lock_path)?;
}
Ok(())
}
#[cfg(windows)]
pub fn cleanup_stale_socket(_config: &DaemonConfig) -> io::Result<()> {
Ok(())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ForceStopOutcome {
Stopped { pid: u32 },
NotRunning,
}
#[cfg(unix)]
pub fn force_stop_daemon(
config: &DaemonConfig,
) -> io::Result<ForceStopOutcome> {
let Some(pid) = read_pid_file(config)? else {
cleanup_stale_socket(config)?;
return Ok(ForceStopOutcome::NotRunning);
};
let pid_i = pid as i32;
let alive = unsafe { libc::kill(pid_i, 0) } == 0;
if !alive {
cleanup_stale_socket(config)?;
return Ok(ForceStopOutcome::NotRunning);
}
unsafe { libc::kill(pid_i, libc::SIGTERM) };
let mut terminated = false;
for _ in 0..50 {
if unsafe { libc::kill(pid_i, 0) } != 0 {
terminated = true;
break;
}
std::thread::sleep(Duration::from_millis(20));
}
if !terminated {
unsafe { libc::kill(pid_i, libc::SIGKILL) };
}
cleanup_stale_socket(config)?;
Ok(ForceStopOutcome::Stopped { pid })
}
#[cfg(windows)]
pub fn force_stop_daemon(
_config: &DaemonConfig,
) -> io::Result<ForceStopOutcome> {
Ok(ForceStopOutcome::NotRunning)
}
const BACKOFF_DELAYS_MS: [u64; 8] = [10, 20, 50, 100, 200, 500, 1000, 2000];
const MAX_ATTEMPTS: usize = 20;
pub async fn wait_for_daemon(config: &DaemonConfig) -> io::Result<()> {
let mut attempt = 0;
while attempt < MAX_ATTEMPTS {
let delay_idx = attempt.min(BACKOFF_DELAYS_MS.len() - 1);
let delay = Duration::from_millis(BACKOFF_DELAYS_MS[delay_idx]);
smol::Timer::after(delay).await;
match check_daemon_status(config).await? {
| DaemonStatus::Running => {
let ws_id = config.workspace_id.clone();
otel::event!(
"daemon_ready",
"attempts" = attempt as i64,
"workspace_id" = ws_id
);
return Ok(());
},
| DaemonStatus::StaleSocket => {
cleanup_stale_socket(config)?;
},
| DaemonStatus::NotRunning => {},
}
attempt += 1;
}
Err(io::Error::new(
io::ErrorKind::TimedOut,
format!(
"daemon failed to start after {} attempts for workspace {}",
MAX_ATTEMPTS, config.workspace_id
),
))
}
#[cfg(unix)]
pub fn write_pid_file(config: &DaemonConfig) -> io::Result<()> {
let pid_path = config.pid_file_path();
if let Some(parent) = pid_path.parent() {
fs::create_dir_all(parent)?;
}
let pid = std::process::id();
fs::write(&pid_path, format!("{}\n", pid))?;
otel::event!(
"daemon_pid_file_written",
"path" = pid_path.display().to_string(),
"pid" = pid as i64
);
Ok(())
}
#[cfg(windows)]
pub fn write_pid_file(_config: &DaemonConfig) -> io::Result<()> {
Ok(())
}
#[cfg(unix)]
pub fn read_pid_file(config: &DaemonConfig) -> io::Result<Option<u32>> {
let pid_path = config.pid_file_path();
if !pid_path.exists() {
return Ok(None);
}
let pid_str = fs::read_to_string(&pid_path)?;
let pid: u32 = pid_str
.trim()
.parse()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Ok(Some(pid))
}
#[cfg(windows)]
pub fn read_pid_file(_config: &DaemonConfig) -> io::Result<Option<u32>> {
Ok(None)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_daemon_status_enum() {
assert_ne!(DaemonStatus::Running, DaemonStatus::NotRunning);
assert_ne!(DaemonStatus::NotRunning, DaemonStatus::StaleSocket);
}
#[test]
fn test_backoff_delays() {
assert_eq!(BACKOFF_DELAYS_MS.len(), 8);
for i in 1..BACKOFF_DELAYS_MS.len() {
assert!(BACKOFF_DELAYS_MS[i] > BACKOFF_DELAYS_MS[i - 1]);
}
}
#[test]
fn test_max_attempts() {
assert!(MAX_ATTEMPTS > BACKOFF_DELAYS_MS.len());
}
#[test]
#[cfg(unix)]
fn test_force_stop_daemon_kills_process_and_cleans_up() {
let config = DaemonConfig::new(
"test-force-stop",
format!("ws-{}", std::process::id()),
);
fs::create_dir_all(config.runtime_dir()).unwrap();
let mut child = std::process::Command::new("sleep")
.arg("60")
.spawn()
.expect("spawn sleep");
let pid = child.id();
fs::write(config.pid_file_path(), format!("{pid}\n")).unwrap();
fs::write(config.socket_path(), b"").unwrap();
let outcome = force_stop_daemon(&config).expect("force_stop_daemon");
assert_eq!(outcome, ForceStopOutcome::Stopped { pid });
let status = child.wait().expect("reap child");
assert!(!status.success(), "child was not terminated: {status:?}");
assert!(!config.pid_file_path().exists(), "pidfile not cleaned up");
assert!(!config.socket_path().exists(), "socket not cleaned up");
}
#[test]
#[cfg(unix)]
fn test_force_stop_daemon_no_pidfile_is_not_running() {
let config = DaemonConfig::new(
"test-force-stop-absent",
format!("ws-absent-{}", std::process::id()),
);
let outcome = force_stop_daemon(&config).expect("force_stop_daemon");
assert_eq!(outcome, ForceStopOutcome::NotRunning);
}
}