Skip to main content

sqry_daemon/lifecycle/
signals.rs

1//! Signal handler installation for the sqryd daemon.
2//!
3//! # Signal model and async-signal safety
4//!
5//! All signal handling here is built on [`tokio::signal`].  Tokio registers an
6//! **async-signal-safe** forwarder at the OS level: on delivery the OS calls a
7//! one-line C trampoline that writes a single byte to an internal self-pipe
8//! (`write(2)` is async-signal-safe per POSIX).  The resulting async `Stream`
9//! is polled exclusively from normal Tokio task context — ordinary async Rust,
10//! not a raw `libc` signal handler.  There is no `SA_RESETHAND`, no global
11//! mutable signal state touched by this module, and no calls to non-async-
12//! signal-safe functions from within the signal-delivery path.
13//!
14//! # Signals handled
15//!
16//! | Signal | Platform | Action |
17//! |--------|----------|--------|
18//! | `SIGTERM` | Unix | Logs at `INFO`, cancels the shutdown token. |
19//! | `SIGINT`  | Unix | Logs at `INFO`, cancels the shutdown token. |
20//! | `SIGHUP`  | Unix | Logs at `WARN` ("treating as graceful shutdown per Task 9 §B.4"), cancels the shutdown token. |
21//! | Ctrl-C    | Windows | Cancels the shutdown token. |
22//!
23//! Hot-reload on `SIGHUP` is explicitly **out of scope** for Task 9 (§B.4 of
24//! the design).  `SIGHUP` triggers a graceful shutdown identical to `SIGTERM`.
25//!
26//! # Usage
27//!
28//! ```ignore
29//! use tokio_util::sync::CancellationToken;
30//! use sqry_daemon::lifecycle::signals::install_signal_handlers;
31//!
32//! let shutdown = CancellationToken::new();
33//! let _guard = install_signal_handlers(shutdown.clone())?;
34//! // …
35//! shutdown.cancelled().await; // wakes on SIGTERM / SIGINT / SIGHUP / Ctrl-C
36//! ```
37//!
38//! The returned [`SignalGuard`] **must be kept alive** for the duration of the
39//! server's run loop.  Dropping it aborts the signal-listener tasks so that
40//! further deliveries of `SIGTERM` / `SIGINT` / `SIGHUP` are no longer
41//! forwarded to the shutdown token.  Note: Tokio installs a process-wide OS
42//! signal handler for each registered signal kind and that handler is **not**
43//! removed when individual listeners are dropped; the OS does not revert to
44//! `SIG_DFL` after the guard is dropped.
45//!
46//! # Design reference
47//!
48//! `docs/reviews/sqryd-daemon/2026-04-19/task-9-design_iter3_request.md`
49//! §M (signal handling, n7 fix), §B.4 (SIGHUP = graceful shutdown).
50
51use tokio_util::sync::CancellationToken;
52
53#[cfg(unix)]
54use crate::error::DaemonError;
55use crate::error::DaemonResult;
56
57/// RAII guard that keeps the signal-listener tasks alive.
58///
59/// Created by [`install_signal_handlers`].  Dropping this guard aborts every
60/// signal-listener task spawned during installation, stopping them from
61/// forwarding further signal deliveries to the shutdown token.
62///
63/// **Important:** Tokio installs a process-wide OS signal handler for each
64/// registered signal kind and that handler is **not** removed when the
65/// listener is dropped.  The OS does **not** revert to `SIG_DFL` after the
66/// guard is dropped.  What changes is that subsequent deliveries are no
67/// longer forwarded to *this* guard's shutdown token; any other active
68/// `tokio::signal` listener in the process could still observe them.  Keep
69/// the guard alive for the entire lifetime of the server's run loop to ensure
70/// graceful-shutdown signals are processed by sqryd.
71#[derive(Debug)]
72#[must_use = "SignalGuard must be kept alive for signal handling to remain active"]
73pub struct SignalGuard {
74    /// Abort handles for each spawned signal-listener task.
75    ///
76    /// Stored as `JoinHandle` so that `abort()` is called on each during
77    /// `Drop`.  We do not `.await` the handles — the abort is fire-and-forget.
78    handles: Vec<tokio::task::JoinHandle<()>>,
79}
80
81impl Drop for SignalGuard {
82    fn drop(&mut self) {
83        for handle in self.handles.drain(..) {
84            handle.abort();
85        }
86    }
87}
88
89/// Install OS signal handlers and wire them to `shutdown`.
90///
91/// On **Unix** this registers listeners for `SIGTERM`, `SIGINT`, and `SIGHUP`.
92/// On **Windows** this registers a `Ctrl-C` listener via
93/// [`tokio::signal::ctrl_c`].
94///
95/// Each signal listener runs in its own Tokio task.  When a signal is
96/// delivered, the task logs the event and calls
97/// [`CancellationToken::cancel`] on `shutdown`.  Because
98/// [`CancellationToken::cancel`] is idempotent, receiving multiple signals
99/// (e.g. two rapid `SIGTERM`s) is safe and produces only a single
100/// cancellation.
101///
102/// # Errors
103///
104/// Returns [`DaemonError::SignalSetup`] if any signal stream cannot be
105/// registered.  This can happen in highly restricted containers (e.g. when
106/// `sigaction(2)` returns `ENOSYS`) or when tokio's signal back-end fails to
107/// initialise its self-pipe.
108pub fn install_signal_handlers(shutdown: CancellationToken) -> DaemonResult<SignalGuard> {
109    let handles = register_handlers(shutdown)?;
110    Ok(SignalGuard { handles })
111}
112
113// ── Platform-specific registration ───────────────────────────────────────────
114
115/// Unix: register SIGTERM, SIGINT, SIGHUP listeners.
116#[cfg(unix)]
117fn register_handlers(
118    shutdown: CancellationToken,
119) -> DaemonResult<Vec<tokio::task::JoinHandle<()>>> {
120    use tokio::signal::unix::{SignalKind, signal};
121
122    let mut sigterm =
123        signal(SignalKind::terminate()).map_err(|e| DaemonError::SignalSetup { source: e })?;
124    let mut sigint =
125        signal(SignalKind::interrupt()).map_err(|e| DaemonError::SignalSetup { source: e })?;
126    let mut sighup =
127        signal(SignalKind::hangup()).map_err(|e| DaemonError::SignalSetup { source: e })?;
128
129    let shutdown_term = shutdown.clone();
130    let h_term = tokio::spawn(async move {
131        loop {
132            // Signal::recv() returns None if the signal driver closes (rare,
133            // but possible in certain runtime teardown scenarios).  Break to
134            // avoid a busy-spin; the shutdown token will be cancelled on next
135            // delivery in the normal path.
136            if sigterm.recv().await.is_none() {
137                tracing::warn!("SIGTERM signal driver closed — listener exiting");
138                break;
139            }
140            tracing::info!("received SIGTERM — initiating graceful shutdown");
141            shutdown_term.cancel();
142        }
143    });
144
145    let shutdown_int = shutdown.clone();
146    let h_int = tokio::spawn(async move {
147        loop {
148            if sigint.recv().await.is_none() {
149                tracing::warn!("SIGINT signal driver closed — listener exiting");
150                break;
151            }
152            tracing::info!("received SIGINT — initiating graceful shutdown");
153            shutdown_int.cancel();
154        }
155    });
156
157    let h_hup = tokio::spawn(async move {
158        loop {
159            if sighup.recv().await.is_none() {
160                tracing::warn!("SIGHUP signal driver closed — listener exiting");
161                break;
162            }
163            tracing::warn!(
164                "received SIGHUP — treating as graceful shutdown per Task 9 §B.4 \
165                 (hot-reload is out of scope)"
166            );
167            shutdown.cancel();
168        }
169    });
170
171    Ok(vec![h_term, h_int, h_hup])
172}
173
174/// Windows: register a Ctrl-C listener.
175#[cfg(not(unix))]
176fn register_handlers(
177    shutdown: CancellationToken,
178) -> DaemonResult<Vec<tokio::task::JoinHandle<()>>> {
179    let h = tokio::spawn(async move {
180        loop {
181            if let Err(e) = tokio::signal::ctrl_c().await {
182                tracing::warn!("ctrl-c listener error: {e} — stopping Ctrl-C handler");
183                break;
184            }
185            tracing::info!("received Ctrl-C — initiating graceful shutdown");
186            shutdown.cancel();
187        }
188    });
189
190    Ok(vec![h])
191}
192
193// ── Tests ─────────────────────────────────────────────────────────────────────
194
195#[cfg(test)]
196mod tests {
197    use std::time::Duration;
198
199    use tokio_util::sync::CancellationToken;
200
201    use super::install_signal_handlers;
202
203    // ── helper ───────────────────────────────────────────────────────────
204
205    /// Wait for `token` to be cancelled with a generous timeout so that slow
206    /// CI machines do not produce spurious failures.  Returns `true` if the
207    /// token was cancelled before the timeout, `false` otherwise.
208    async fn wait_cancelled(token: &CancellationToken) -> bool {
209        tokio::time::timeout(Duration::from_secs(5), token.cancelled())
210            .await
211            .is_ok()
212    }
213
214    // ── Unix signal tests ─────────────────────────────────────────────────
215
216    /// SIGTERM must cancel the shutdown token.
217    ///
218    /// Sends `SIGTERM` to the current process via `libc::kill(getpid(),
219    /// SIGTERM)`.  Tokio's signal back-end delivers the signal to the listener
220    /// task through its internal self-pipe; the task then calls
221    /// `shutdown.cancel()`.
222    #[cfg(unix)]
223    #[tokio::test]
224    async fn sigterm_triggers_cancellation_token() {
225        let shutdown = CancellationToken::new();
226        let _guard = install_signal_handlers(shutdown.clone())
227            .expect("install_signal_handlers must succeed");
228
229        // Give the listener tasks a moment to start before sending the signal.
230        tokio::task::yield_now().await;
231
232        // SAFETY: `getpid()` returns the calling process's PID; `kill(pid,
233        // SIGTERM)` sends SIGTERM only to this process.  Tokio has already
234        // registered its async-signal-safe forwarder, so the signal is
235        // delivered to the listener task rather than terminating the process.
236        let pid = unsafe { libc::getpid() };
237        let rc = unsafe { libc::kill(pid, libc::SIGTERM) };
238        assert_eq!(rc, 0, "kill(getpid(), SIGTERM) must succeed");
239
240        assert!(
241            wait_cancelled(&shutdown).await,
242            "shutdown token must be cancelled after SIGTERM"
243        );
244    }
245
246    /// SIGINT must cancel the shutdown token.
247    #[cfg(unix)]
248    #[tokio::test]
249    async fn sigint_triggers_cancellation_token() {
250        let shutdown = CancellationToken::new();
251        let _guard = install_signal_handlers(shutdown.clone())
252            .expect("install_signal_handlers must succeed");
253
254        tokio::task::yield_now().await;
255
256        let pid = unsafe { libc::getpid() };
257        let rc = unsafe { libc::kill(pid, libc::SIGINT) };
258        assert_eq!(rc, 0, "kill(getpid(), SIGINT) must succeed");
259
260        assert!(
261            wait_cancelled(&shutdown).await,
262            "shutdown token must be cancelled after SIGINT"
263        );
264    }
265
266    /// SIGHUP must cancel the shutdown token.
267    ///
268    /// The SIGHUP listener emits a WARN-level log ("treating as graceful
269    /// shutdown per Task 9 §B.4") before cancelling.  This test verifies the
270    /// cancellation behaviour; the exact log output is not captured here
271    /// because it would require a custom tracing subscriber, which adds
272    /// fragility.
273    #[cfg(unix)]
274    #[tokio::test]
275    async fn sighup_triggers_cancellation_token_with_warn_log() {
276        let shutdown = CancellationToken::new();
277        let _guard = install_signal_handlers(shutdown.clone())
278            .expect("install_signal_handlers must succeed");
279
280        tokio::task::yield_now().await;
281
282        let pid = unsafe { libc::getpid() };
283        let rc = unsafe { libc::kill(pid, libc::SIGHUP) };
284        assert_eq!(rc, 0, "kill(getpid(), SIGHUP) must succeed");
285
286        assert!(
287            wait_cancelled(&shutdown).await,
288            "shutdown token must be cancelled after SIGHUP"
289        );
290    }
291
292    /// `install_signal_handlers` must be callable repeatedly (each call
293    /// registers a fresh set of listeners on the same signals, which tokio
294    /// supports).  This also verifies that dropping a guard stops the previous
295    /// listeners without affecting a subsequent `install_signal_handlers` call.
296    #[cfg(unix)]
297    #[tokio::test]
298    async fn install_is_idempotent_across_independent_invocations() {
299        // First guard installed and immediately dropped — its listeners are
300        // aborted but the signal streams are destroyed cleanly.
301        {
302            let shutdown = CancellationToken::new();
303            let _guard =
304                install_signal_handlers(shutdown.clone()).expect("first install must succeed");
305        }
306
307        // Second installation on a fresh token must still succeed.
308        let shutdown2 = CancellationToken::new();
309        let _guard2 = install_signal_handlers(shutdown2.clone())
310            .expect("second install after drop must succeed");
311
312        tokio::task::yield_now().await;
313
314        let pid = unsafe { libc::getpid() };
315        let rc = unsafe { libc::kill(pid, libc::SIGTERM) };
316        assert_eq!(rc, 0, "kill(getpid(), SIGTERM) must succeed");
317
318        assert!(
319            wait_cancelled(&shutdown2).await,
320            "second shutdown token must be cancelled after SIGTERM"
321        );
322    }
323
324    /// Dropping `SignalGuard` before a signal is received must not cancel the
325    /// token.  The listener tasks have been aborted; Tokio's process-wide OS
326    /// handler still intercepts the signal (the OS does NOT restore `SIG_DFL`
327    /// on listener drop per Tokio's documented behavior), but no application
328    /// code observes it.
329    ///
330    /// NOTE: We cannot easily verify that a subsequent SIGTERM is still
331    /// intercepted by Tokio rather than killing the process, so we only assert
332    /// that the token is NOT cancelled shortly after the guard is dropped.
333    #[cfg(unix)]
334    #[tokio::test]
335    async fn dropping_guard_before_signal_does_not_cancel_token() {
336        let shutdown = CancellationToken::new();
337
338        // Install and immediately drop the guard.
339        drop(install_signal_handlers(shutdown.clone()).expect("install must succeed"));
340
341        // The token must NOT have been cancelled.
342        assert!(
343            !shutdown.is_cancelled(),
344            "token must not be cancelled when no signal has been sent"
345        );
346    }
347
348    // ── Registration-failure simulation ──────────────────────────────────
349
350    /// On Unix we cannot easily trigger a real `ENOSYS` registration failure
351    /// from a test (it only occurs in very restricted container environments),
352    /// so we instead verify the _happy-path_ installation returns `Ok` and
353    /// the guard is usable.  The `DaemonError::SignalSetup` variant is
354    /// exercised by the unit tests in `error.rs` (exit code + JSON-RPC code
355    /// checks), not here.
356    #[cfg(unix)]
357    #[tokio::test]
358    async fn install_signal_handlers_returns_ok_on_happy_path() {
359        let shutdown = CancellationToken::new();
360        let result = install_signal_handlers(shutdown);
361        assert!(
362            result.is_ok(),
363            "install_signal_handlers must return Ok on a normal Unix host: {result:?}"
364        );
365    }
366
367    // ── Windows / non-Unix Ctrl-C test ────────────────────────────────────
368
369    /// On Windows (and other non-Unix platforms), install must succeed.
370    /// We cannot trigger a synthetic Ctrl-C in tests without platform-specific
371    /// APIs, so this test only validates successful installation.
372    #[cfg(not(unix))]
373    #[tokio::test]
374    async fn install_signal_handlers_returns_ok_on_non_unix() {
375        let shutdown = CancellationToken::new();
376        let result = install_signal_handlers(shutdown);
377        assert!(
378            result.is_ok(),
379            "install_signal_handlers must return Ok on a non-Unix host: {result:?}"
380        );
381    }
382}