Skip to main content

oxios_kernel/
daemon.rs

1//! Daemon lifecycle management — PID file, start/stop, system service install.
2//!
3//! On macOS: launchd (`~/Library/LaunchAgents/com.a7garden.oxios.plist`)
4//! On Linux: systemd (`/etc/systemd/system/oxiosd.service`)
5
6use anyhow::{Context, Result};
7use std::path::{Path, PathBuf};
8
9/// Daemon status.
10#[derive(Debug, Clone)]
11pub enum DaemonStatus {
12    /// Daemon is running.
13    Running {
14        /// Process ID.
15        pid: u32,
16    },
17    /// PID file exists but process is dead (stale).
18    Stale {
19        /// Process ID of the dead process.
20        pid: u32,
21    },
22    /// Daemon is not running.
23    Stopped,
24}
25
26impl std::fmt::Display for DaemonStatus {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            DaemonStatus::Running { pid } => write!(f, "running (PID {pid})"),
30            DaemonStatus::Stale { pid } => write!(f, "stale (PID {pid} dead)"),
31            DaemonStatus::Stopped => write!(f, "stopped"),
32        }
33    }
34}
35
36/// Manages the oxios background daemon.
37pub struct DaemonManager {
38    pid_file: PathBuf,
39    log_dir: PathBuf,
40}
41
42impl DaemonManager {
43    /// Create a daemon manager from config paths.
44    pub fn new(pid_file: &str, log_dir: &str) -> Self {
45        Self {
46            pid_file: crate::config::expand_home(pid_file),
47            log_dir: crate::config::expand_home(log_dir),
48        }
49    }
50
51    /// Check daemon status by reading the PID file.
52    pub fn status(&self) -> DaemonStatus {
53        match self.read_pid() {
54            Some(pid) => {
55                if self.is_alive(pid) {
56                    DaemonStatus::Running { pid }
57                } else {
58                    DaemonStatus::Stale { pid }
59                }
60            }
61            None => DaemonStatus::Stopped,
62        }
63    }
64
65    /// Start the daemon in the background and wait for it to begin accepting
66    /// connections on `port` (RFC-024 SP4: verifies the listener came up so
67    /// a port-bind failure is reported immediately instead of masked by a
68    /// `started` message that never resolves).
69    pub fn start(&self, config_path: &Path, port: u16) -> Result<()> {
70        match self.status() {
71            DaemonStatus::Running { pid } => {
72                anyhow::bail!("oxios is already running (PID {pid})");
73            }
74            DaemonStatus::Stale { .. } => {
75                self.cleanup()?;
76            }
77            DaemonStatus::Stopped => {}
78        }
79
80        // Ensure log directory exists
81        std::fs::create_dir_all(&self.log_dir).context("failed to create log directory")?;
82
83        let log_file = self.log_dir.join("oxios.log");
84        let exe = std::env::current_exe().context("failed to locate oxios binary")?;
85
86        let child = std::process::Command::new(&exe)
87            .arg("--foreground")
88            .arg("--config")
89            .arg(config_path)
90            .stdout(std::fs::File::create(&log_file)?)
91            .stderr(std::fs::File::create(&log_file)?)
92            .spawn()
93            .context("failed to spawn oxios daemon")?;
94
95        let pid = child.id();
96        self.write_pid(pid)?;
97
98        println!("⬡ oxios started (PID {pid})");
99        println!("  Logs: {}", log_file.display());
100        println!("  Dashboard: http://127.0.0.1:{port}");
101
102        // RFC-024 SP4: verify the daemon is actually accepting connections.
103        // A misconfigured bind (TIME_WAIT, port in use) used to be invisible
104        // here — the user saw `started` but `curl` got connection refused.
105        match self.wait_until_listening(port, std::time::Duration::from_secs(15)) {
106            Ok(()) => println!("  Status:   ready (listening on :{port})"),
107            Err(_) => {
108                println!("  Status:   still warming up (did not respond on :{port} within 15s)")
109            }
110        }
111        Ok(())
112    }
113
114    /// Poll `127.0.0.1:port` until a TCP connect succeeds or `timeout` elapses.
115    fn wait_until_listening(&self, port: u16, timeout: std::time::Duration) -> Result<()> {
116        use std::net::ToSocketAddrs;
117        let addr = format!("127.0.0.1:{port}")
118            .to_socket_addrs()?
119            .next()
120            .ok_or_else(|| anyhow::anyhow!("invalid bind address 127.0.0.1:{port}"))?;
121        let start = std::time::Instant::now();
122        let interval = std::time::Duration::from_millis(200);
123        while start.elapsed() < timeout {
124            if std::net::TcpStream::connect_timeout(&addr, interval).is_ok() {
125                return Ok(());
126            }
127            std::thread::sleep(interval);
128        }
129        anyhow::bail!("daemon did not start listening on :{port} within {timeout:?}")
130    }
131
132    /// Stop the daemon by sending SIGTERM.
133    pub fn stop(&self) -> Result<()> {
134        match self.status() {
135            DaemonStatus::Running { pid } => {
136                #[cfg(unix)]
137                {
138                    let ret = unsafe { libc::kill(pid as i32, libc::SIGTERM) };
139                    if ret != 0 {
140                        anyhow::bail!("failed to send SIGTERM to PID {pid}");
141                    }
142                }
143                #[cfg(not(unix))]
144                {
145                    // On non-Unix, just kill the process
146                    let _ = std::process::Command::new("taskkill")
147                        .args(["/PID", &pid.to_string(), "/F"])
148                        .output();
149                }
150
151                // Wait briefly for process to die
152                for _ in 0..10 {
153                    std::thread::sleep(std::time::Duration::from_millis(200));
154                    if !self.is_alive(pid) {
155                        break;
156                    }
157                }
158
159                self.cleanup()?;
160                println!("⬡ oxios stopped");
161                Ok(())
162            }
163            DaemonStatus::Stale { .. } => {
164                self.cleanup()?;
165                println!("⬡ cleaned up stale PID file");
166                Ok(())
167            }
168            DaemonStatus::Stopped => {
169                println!("⬡ oxios is not running");
170                Ok(())
171            }
172        }
173    }
174
175    /// Restart the daemon.
176    pub fn restart(&self, config_path: &Path, port: u16) -> Result<()> {
177        if matches!(self.status(), DaemonStatus::Running { .. }) {
178            self.stop()?;
179            std::thread::sleep(std::time::Duration::from_millis(500));
180        }
181        self.start(config_path, port)
182    }
183
184    /// Install as a system service (launchd on macOS, systemd on Linux).
185    pub fn install_service(&self) -> Result<()> {
186        let exe = std::env::current_exe().context("failed to locate oxios binary")?;
187
188        #[cfg(target_os = "macos")]
189        {
190            let plist_dir = dirs::home_dir()
191                .map(|h| h.join("Library/LaunchAgents"))
192                .context("failed to locate LaunchAgents directory")?;
193            std::fs::create_dir_all(&plist_dir)?;
194            let plist_path = plist_dir.join("com.a7garden.oxios.plist");
195
196            let home = dirs::home_dir().context("failed to get HOME")?;
197            let log_path = self.log_dir.join("oxiosd.log");
198
199            let plist = format!(
200                r#"<?xml version="1.0" encoding="UTF-8"?>
201<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
202<plist version="1.0">
203<dict>
204    <key>Label</key>
205    <string>com.a7garden.oxios</string>
206    <key>ProgramArguments</key>
207    <array>
208        <string>{exe}</string>
209        <string>--foreground</string>
210    </array>
211    <key>RunAtLoad</key>
212    <true/>
213    <key>KeepAlive</key>
214    <true/>
215    <key>StandardOutPath</key>
216    <string>{log}</string>
217    <key>StandardErrorPath</key>
218    <string>{log}</string>
219    <key>WorkingDirectory</key>
220    <string>{home}</string>
221</dict>
222</plist>
223"#,
224                exe = exe.display(),
225                log = log_path.display(),
226                home = home.display(),
227            );
228
229            std::fs::write(&plist_path, &plist)?;
230            println!("✓ Installed launchd service");
231            println!("  {}", plist_path.display());
232            println!();
233            println!("  Start with:   launchctl load {}", plist_path.display());
234            println!("  Stop with:    launchctl unload {}", plist_path.display());
235            println!("  Or simply:    oxios start / oxios stop");
236        }
237
238        #[cfg(target_os = "linux")]
239        {
240            let unit_dir = PathBuf::from("/etc/systemd/system");
241            let unit_path = unit_dir.join("oxiosd.service");
242
243            let unit = format!(
244                r#"[Unit]
245Description=Oxios Agent Operating System
246After=network.target
247
248[Service]
249Type=simple
250ExecStart={exe} --foreground
251Restart=on-failure
252RestartSec=5s
253
254[Install]
255WantedBy=multi-user.target
256"#,
257                exe = exe.display(),
258            );
259
260            // Try to write — may fail without sudo
261            if let Err(e) = std::fs::write(&unit_path, &unit) {
262                anyhow::bail!(
263                    "Failed to write {} — run with sudo: {}",
264                    unit_path.display(),
265                    e
266                );
267            }
268
269            println!("✓ Installed systemd service");
270            println!("  {}", unit_path.display());
271            println!();
272            println!("  Reload:  sudo systemctl daemon-reload");
273            println!("  Start:   sudo systemctl start oxiosd");
274            println!("  Enable:  sudo systemctl enable oxiosd");
275        }
276
277        #[cfg(not(any(target_os = "macos", target_os = "linux")))]
278        {
279            anyhow::bail!("daemon install only supported on macOS and Linux");
280        }
281
282        Ok(())
283    }
284
285    /// Uninstall the system service.
286    pub fn uninstall_service(&self) -> Result<()> {
287        #[cfg(target_os = "macos")]
288        {
289            let plist_path = dirs::home_dir()
290                .map(|h| h.join("Library/LaunchAgents/com.a7garden.oxios.plist"))
291                .context("failed to locate plist")?;
292
293            if plist_path.exists() {
294                std::fs::remove_file(&plist_path)?;
295                println!("✓ Removed launchd service");
296            } else {
297                println!("  Service not installed");
298            }
299        }
300
301        #[cfg(target_os = "linux")]
302        {
303            let unit_path = PathBuf::from("/etc/systemd/system/oxiosd.service");
304            if unit_path.exists() {
305                if let Err(e) = std::fs::remove_file(&unit_path) {
306                    anyhow::bail!(
307                        "Failed to remove {} — run with sudo: {}",
308                        unit_path.display(),
309                        e
310                    );
311                }
312                println!("✓ Removed systemd service");
313            } else {
314                println!("  Service not installed");
315            }
316        }
317
318        #[cfg(not(any(target_os = "macos", target_os = "linux")))]
319        {
320            anyhow::bail!("daemon uninstall only supported on macOS and Linux");
321        }
322
323        Ok(())
324    }
325
326    // ── Internal helpers ──
327
328    fn read_pid(&self) -> Option<u32> {
329        let content = std::fs::read_to_string(&self.pid_file).ok()?;
330        content.trim().parse().ok()
331    }
332
333    fn write_pid(&self, pid: u32) -> Result<()> {
334        if let Some(parent) = self.pid_file.parent() {
335            std::fs::create_dir_all(parent)?;
336        }
337        std::fs::write(&self.pid_file, pid.to_string())?;
338        Ok(())
339    }
340
341    fn cleanup(&self) -> Result<()> {
342        if self.pid_file.exists() {
343            std::fs::remove_file(&self.pid_file)?;
344        }
345        Ok(())
346    }
347
348    fn is_alive(&self, pid: u32) -> bool {
349        #[cfg(unix)]
350        {
351            // Signal 0 = check if process exists
352            unsafe { libc::kill(pid as i32, 0) == 0 }
353        }
354        #[cfg(not(unix))]
355        {
356            // On non-Unix, always return false (conservative)
357            let _ = pid;
358            false
359        }
360    }
361}