Skip to main content

synwire_daemon/
lifecycle.rs

1//! Daemon lifecycle management: PID file, Unix domain socket, grace period,
2//! signal handling, and cleanup.
3//!
4//! This module contains all the logic for the daemon's lifecycle:
5//!
6//! - **PID file** at `StorageLayout::daemon_pid_file()` prevents duplicate
7//!   instances and allows clients to detect a running daemon.
8//! - **Unix domain socket** at `StorageLayout::daemon_socket()` accepts
9//!   connections from MCP server proxies.
10//! - **Grace period** (5 minutes) triggers automatic shutdown when no clients
11//!   are connected.
12//! - **Signal handling** catches SIGTERM/SIGINT for clean shutdown.
13
14use std::path::Path;
15use std::sync::Arc;
16use std::sync::atomic::{AtomicUsize, Ordering};
17
18use tokio::sync::Notify;
19use tracing::{error, info, warn};
20
21/// Duration of the grace period before the daemon shuts down after the last
22/// client disconnects.
23pub const GRACE_PERIOD: std::time::Duration = std::time::Duration::from_secs(5 * 60);
24
25// ── Main entry point ────────────────────────────────────────────────────────
26
27/// Run the daemon's accept loop and grace-period shutdown logic.
28///
29/// Binds a Unix domain socket at `sock_path`, accepts client connections, and
30/// monitors for shutdown conditions (signal or grace-period expiry). Returns
31/// `0` on clean shutdown, `1` on error.
32pub async fn run_daemon(sock_path: &Path, pid_path: &Path) -> i32 {
33    // Remove a stale socket from a previous run.
34    remove_stale_socket(sock_path);
35
36    let Some(listener) = bind_listener(sock_path) else {
37        cleanup(pid_path, sock_path);
38        return 1;
39    };
40
41    info!(path = %sock_path.display(), "listening on Unix domain socket");
42
43    let client_count = Arc::new(AtomicUsize::new(0));
44    let client_changed = Arc::new(Notify::new());
45
46    // Spawn the accept loop.
47    let accept_clients = client_count.clone();
48    let accept_notify = client_changed.clone();
49    let accept_handle = tokio::spawn(async move {
50        accept_loop(listener, accept_clients, accept_notify).await;
51    });
52
53    // Wait for a shutdown signal or the grace-period to expire.
54    let reason = shutdown_monitor(client_count, client_changed).await;
55    info!(reason = %reason, "initiating shutdown");
56
57    // Cancel the accept loop.
58    accept_handle.abort();
59    let _ = accept_handle.await;
60
61    0
62}
63
64// ── Accept loop ─────────────────────────────────────────────────────────────
65
66/// Accept incoming Unix-socket connections, tracking the active client count.
67#[cfg(unix)]
68async fn accept_loop(
69    listener: tokio::net::UnixListener,
70    client_count: Arc<AtomicUsize>,
71    client_changed: Arc<Notify>,
72) {
73    loop {
74        let (stream, _addr) = match listener.accept().await {
75            Ok(pair) => pair,
76            Err(e) => {
77                warn!("Failed to accept connection: {e}");
78                continue;
79            }
80        };
81
82        let count = client_count.clone();
83        let notify = client_changed.clone();
84
85        let prev = count.fetch_add(1, Ordering::SeqCst);
86        info!(clients = prev + 1, "client connected");
87        notify.notify_waiters();
88
89        // Spawn a task per client.  For now the client simply holds the
90        // connection open -- no IPC protocol is implemented yet (future phase).
91        let _handle: tokio::task::JoinHandle<()> = tokio::spawn(async move {
92            handle_client(stream).await;
93
94            let prev = count.fetch_sub(1, Ordering::SeqCst);
95            info!(clients = prev - 1, "client disconnected");
96            notify.notify_waiters();
97        });
98    }
99}
100
101/// Placeholder per-client handler.  Waits until the remote end closes the
102/// connection (or the task is cancelled).
103#[cfg(unix)]
104async fn handle_client(stream: tokio::net::UnixStream) {
105    // Wait for the readable half to signal EOF (remote close).
106    let _ = stream.readable().await;
107    // Drain -- the stream may become readable multiple times before true EOF.
108    let mut buf = [0u8; 1024];
109    loop {
110        match stream.try_read(&mut buf) {
111            // EOF or error (including WouldBlock) -- treat as disconnect.
112            Ok(0) | Err(_) => break,
113            Ok(_) => {
114                // Discard data; no IPC protocol yet.
115                let _ = stream.readable().await;
116            }
117        }
118    }
119}
120
121/// Placeholder accept loop on non-Unix platforms (no-op, just waits forever).
122#[cfg(not(unix))]
123async fn accept_loop(_listener: (), _client_count: Arc<AtomicUsize>, _client_changed: Arc<Notify>) {
124    // No UDS on this platform; the daemon will shut down via signal only.
125    std::future::pending::<()>().await;
126}
127
128// ── Shutdown monitoring ─────────────────────────────────────────────────────
129
130/// Wait for either a termination signal or grace-period expiry.
131///
132/// Returns a human-readable reason string describing why shutdown was
133/// initiated.
134async fn shutdown_monitor(
135    client_count: Arc<AtomicUsize>,
136    client_changed: Arc<Notify>,
137) -> &'static str {
138    // Start with the grace period running (no clients yet).
139    loop {
140        let clients = client_count.load(Ordering::SeqCst);
141
142        if clients > 0 {
143            // While clients are connected, just wait for a signal or a client
144            // change event.
145            tokio::select! {
146                () = shutdown_signal() => return "received shutdown signal",
147                () = client_changed.notified() => {}
148            }
149        } else {
150            // No clients -- start the grace-period timer.
151            info!(
152                grace_secs = GRACE_PERIOD.as_secs(),
153                "no active clients, grace period started"
154            );
155
156            tokio::select! {
157                () = shutdown_signal() => return "received shutdown signal",
158                () = tokio::time::sleep(GRACE_PERIOD) => {
159                    // Recheck: a client might have connected just before the
160                    // timer fired.
161                    if client_count.load(Ordering::SeqCst) == 0 {
162                        return "grace period expired with no clients";
163                    }
164                    // Clients reconnected -- loop around.
165                }
166                () = client_changed.notified() => {}
167            }
168        }
169    }
170}
171
172/// Wait for SIGTERM or SIGINT (Unix) / Ctrl-C (all platforms).
173async fn shutdown_signal() {
174    #[cfg(unix)]
175    {
176        use tokio::signal::unix::{SignalKind, signal};
177        let mut sigterm = signal(SignalKind::terminate()).unwrap_or_else(|e| {
178            // We cannot recover from this, but we also cannot panic.
179            error!("Failed to register SIGTERM handler: {e}");
180            std::process::exit(1);
181        });
182        let mut sigint = signal(SignalKind::interrupt()).unwrap_or_else(|e| {
183            error!("Failed to register SIGINT handler: {e}");
184            std::process::exit(1);
185        });
186        tokio::select! {
187            () = async { let _ = sigterm.recv().await; } => {}
188            () = async { let _ = sigint.recv().await; } => {}
189        }
190    }
191
192    #[cfg(not(unix))]
193    {
194        let _ = tokio::signal::ctrl_c().await;
195    }
196}
197
198// ── PID file helpers ────────────────────────────────────────────────────────
199
200/// Check whether an existing PID file points to a running process.
201///
202/// Returns `Ok(())` if no daemon is running (or the PID file is stale).
203/// Returns `Err(message)` if another daemon is still alive.
204pub fn check_existing_daemon(pid_path: &Path) -> Result<(), String> {
205    let contents = match std::fs::read_to_string(pid_path) {
206        Ok(c) => c,
207        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
208        Err(e) => {
209            return Err(format!(
210                "Unable to read PID file {}: {e}",
211                pid_path.display()
212            ));
213        }
214    };
215
216    let Ok(pid) = contents.trim().parse::<u32>() else {
217        warn!(
218            path = %pid_path.display(),
219            contents = %contents.trim(),
220            "Removing PID file with invalid contents"
221        );
222        let _ = std::fs::remove_file(pid_path);
223        return Ok(());
224    };
225
226    if is_process_alive(pid) {
227        return Err(format!(
228            "Another synwire-daemon is already running (PID {pid})"
229        ));
230    }
231
232    // Stale PID file -- the process is gone.
233    warn!(pid, "Removing stale PID file for dead process");
234    let _ = std::fs::remove_file(pid_path);
235    Ok(())
236}
237
238/// Write the current process ID to the PID file.
239pub fn write_pid_file(pid_path: &Path) -> std::io::Result<()> {
240    std::fs::write(pid_path, format!("{}\n", std::process::id()))
241}
242
243/// Check whether a process with the given PID is alive.
244#[cfg(unix)]
245fn is_process_alive(pid: u32) -> bool {
246    // kill(pid, 0) checks existence without sending a signal.
247    nix::sys::signal::kill(
248        nix::unistd::Pid::from_raw(i32::try_from(pid).unwrap_or(0)),
249        None,
250    )
251    .is_ok()
252}
253
254/// Check whether a process with the given PID is alive (non-Unix stub).
255#[cfg(not(unix))]
256fn is_process_alive(_pid: u32) -> bool {
257    // Conservative: assume it might still be running. The user can manually
258    // delete the PID file if needed.
259    true
260}
261
262// ── Socket helpers ──────────────────────────────────────────────────────────
263
264/// Remove a leftover socket file from a previous daemon run.
265fn remove_stale_socket(sock_path: &Path) {
266    if sock_path.exists() {
267        warn!(path = %sock_path.display(), "Removing stale daemon socket");
268        let _ = std::fs::remove_file(sock_path);
269    }
270}
271
272/// Bind a `UnixListener` on the given path, returning `None` on failure.
273#[cfg(unix)]
274fn bind_listener(sock_path: &Path) -> Option<tokio::net::UnixListener> {
275    match tokio::net::UnixListener::bind(sock_path) {
276        Ok(l) => {
277            // Restrict socket permissions to owner only.
278            use std::os::unix::fs::PermissionsExt;
279            let perms = std::fs::Permissions::from_mode(0o600);
280            if let Err(e) = std::fs::set_permissions(sock_path, perms) {
281                warn!(
282                    path = %sock_path.display(),
283                    "Failed to restrict socket permissions: {e}"
284                );
285            }
286            Some(l)
287        }
288        Err(e) => {
289            error!(path = %sock_path.display(), "Failed to bind Unix socket: {e}");
290            None
291        }
292    }
293}
294
295/// On non-Unix platforms, UDS is not available.
296#[cfg(not(unix))]
297fn bind_listener(sock_path: &Path) -> Option<()> {
298    warn!(
299        path = %sock_path.display(),
300        "Unix domain sockets are not supported on this platform; running without IPC listener"
301    );
302    Some(())
303}
304
305// ── Cleanup ─────────────────────────────────────────────────────────────────
306
307/// Best-effort removal of the PID file and socket.
308pub fn cleanup(pid_path: &Path, sock_path: &Path) {
309    if let Err(e) = std::fs::remove_file(pid_path)
310        && e.kind() != std::io::ErrorKind::NotFound
311    {
312        warn!(path = %pid_path.display(), "Failed to remove PID file: {e}");
313    }
314    if let Err(e) = std::fs::remove_file(sock_path)
315        && e.kind() != std::io::ErrorKind::NotFound
316    {
317        warn!(path = %sock_path.display(), "Failed to remove socket: {e}");
318    }
319}