sqry_daemon/lifecycle/signals.rs
1//! Signal handler installation for the sqryd daemon.
2//!
3//! # Signal model and async-signal safety
4//!
5//! All signal handling here is built on [`tokio::signal`]. Tokio registers an
6//! **async-signal-safe** forwarder at the OS level: on delivery the OS calls a
7//! one-line C trampoline that writes a single byte to an internal self-pipe
8//! (`write(2)` is async-signal-safe per POSIX). The resulting async `Stream`
9//! is polled exclusively from normal Tokio task context — ordinary async Rust,
10//! not a raw `libc` signal handler. There is no `SA_RESETHAND`, no global
11//! mutable signal state touched by this module, and no calls to non-async-
12//! signal-safe functions from within the signal-delivery path.
13//!
14//! # Signals handled
15//!
16//! | Signal | Platform | Action |
17//! |--------|----------|--------|
18//! | `SIGTERM` | Unix | Logs at `INFO`, cancels the shutdown token. |
19//! | `SIGINT` | Unix | Logs at `INFO`, cancels the shutdown token. |
20//! | `SIGHUP` | Unix | Logs at `WARN` ("treating as graceful shutdown per Task 9 §B.4"), cancels the shutdown token. |
21//! | Ctrl-C | Windows | Cancels the shutdown token. |
22//!
23//! Hot-reload on `SIGHUP` is explicitly **out of scope** for Task 9 (§B.4 of
24//! the design). `SIGHUP` triggers a graceful shutdown identical to `SIGTERM`.
25//!
26//! # Usage
27//!
28//! ```ignore
29//! use tokio_util::sync::CancellationToken;
30//! use sqry_daemon::lifecycle::signals::install_signal_handlers;
31//!
32//! let shutdown = CancellationToken::new();
33//! let _guard = install_signal_handlers(shutdown.clone())?;
34//! // …
35//! shutdown.cancelled().await; // wakes on SIGTERM / SIGINT / SIGHUP / Ctrl-C
36//! ```
37//!
38//! The returned [`SignalGuard`] **must be kept alive** for the duration of the
39//! server's run loop. Dropping it aborts the signal-listener tasks so that
40//! further deliveries of `SIGTERM` / `SIGINT` / `SIGHUP` are no longer
41//! forwarded to the shutdown token. Note: Tokio installs a process-wide OS
42//! signal handler for each registered signal kind and that handler is **not**
43//! removed when individual listeners are dropped; the OS does not revert to
44//! `SIG_DFL` after the guard is dropped.
45//!
46//! # Design reference
47//!
48//! `docs/reviews/sqryd-daemon/2026-04-19/task-9-design_iter3_request.md`
49//! §M (signal handling, n7 fix), §B.4 (SIGHUP = graceful shutdown).
50
51use tokio_util::sync::CancellationToken;
52
53#[cfg(unix)]
54use crate::error::DaemonError;
55use crate::error::DaemonResult;
56
57/// RAII guard that keeps the signal-listener tasks alive.
58///
59/// Created by [`install_signal_handlers`]. Dropping this guard aborts every
60/// signal-listener task spawned during installation, stopping them from
61/// forwarding further signal deliveries to the shutdown token.
62///
63/// **Important:** Tokio installs a process-wide OS signal handler for each
64/// registered signal kind and that handler is **not** removed when the
65/// listener is dropped. The OS does **not** revert to `SIG_DFL` after the
66/// guard is dropped. What changes is that subsequent deliveries are no
67/// longer forwarded to *this* guard's shutdown token; any other active
68/// `tokio::signal` listener in the process could still observe them. Keep
69/// the guard alive for the entire lifetime of the server's run loop to ensure
70/// graceful-shutdown signals are processed by sqryd.
71#[derive(Debug)]
72#[must_use = "SignalGuard must be kept alive for signal handling to remain active"]
73pub struct SignalGuard {
74 /// Abort handles for each spawned signal-listener task.
75 ///
76 /// Stored as `JoinHandle` so that `abort()` is called on each during
77 /// `Drop`. We do not `.await` the handles — the abort is fire-and-forget.
78 handles: Vec<tokio::task::JoinHandle<()>>,
79}
80
81impl Drop for SignalGuard {
82 fn drop(&mut self) {
83 for handle in self.handles.drain(..) {
84 handle.abort();
85 }
86 }
87}
88
89/// Install OS signal handlers and wire them to `shutdown`.
90///
91/// On **Unix** this registers listeners for `SIGTERM`, `SIGINT`, and `SIGHUP`.
92/// On **Windows** this registers a `Ctrl-C` listener via
93/// [`tokio::signal::ctrl_c`].
94///
95/// Each signal listener runs in its own Tokio task. When a signal is
96/// delivered, the task logs the event and calls
97/// [`CancellationToken::cancel`] on `shutdown`. Because
98/// [`CancellationToken::cancel`] is idempotent, receiving multiple signals
99/// (e.g. two rapid `SIGTERM`s) is safe and produces only a single
100/// cancellation.
101///
102/// # Errors
103///
104/// Returns [`DaemonError::SignalSetup`] if any signal stream cannot be
105/// registered. This can happen in highly restricted containers (e.g. when
106/// `sigaction(2)` returns `ENOSYS`) or when tokio's signal back-end fails to
107/// initialise its self-pipe.
108pub fn install_signal_handlers(shutdown: CancellationToken) -> DaemonResult<SignalGuard> {
109 let handles = register_handlers(shutdown)?;
110 Ok(SignalGuard { handles })
111}
112
113// ── Platform-specific registration ───────────────────────────────────────────
114
115/// Unix: register SIGTERM, SIGINT, SIGHUP listeners.
116#[cfg(unix)]
117fn register_handlers(
118 shutdown: CancellationToken,
119) -> DaemonResult<Vec<tokio::task::JoinHandle<()>>> {
120 use tokio::signal::unix::{SignalKind, signal};
121
122 let mut sigterm =
123 signal(SignalKind::terminate()).map_err(|e| DaemonError::SignalSetup { source: e })?;
124 let mut sigint =
125 signal(SignalKind::interrupt()).map_err(|e| DaemonError::SignalSetup { source: e })?;
126 let mut sighup =
127 signal(SignalKind::hangup()).map_err(|e| DaemonError::SignalSetup { source: e })?;
128
129 let shutdown_term = shutdown.clone();
130 let h_term = tokio::spawn(async move {
131 loop {
132 // Signal::recv() returns None if the signal driver closes (rare,
133 // but possible in certain runtime teardown scenarios). Break to
134 // avoid a busy-spin; the shutdown token will be cancelled on next
135 // delivery in the normal path.
136 if sigterm.recv().await.is_none() {
137 tracing::warn!("SIGTERM signal driver closed — listener exiting");
138 break;
139 }
140 tracing::info!("received SIGTERM — initiating graceful shutdown");
141 shutdown_term.cancel();
142 }
143 });
144
145 let shutdown_int = shutdown.clone();
146 let h_int = tokio::spawn(async move {
147 loop {
148 if sigint.recv().await.is_none() {
149 tracing::warn!("SIGINT signal driver closed — listener exiting");
150 break;
151 }
152 tracing::info!("received SIGINT — initiating graceful shutdown");
153 shutdown_int.cancel();
154 }
155 });
156
157 let h_hup = tokio::spawn(async move {
158 loop {
159 if sighup.recv().await.is_none() {
160 tracing::warn!("SIGHUP signal driver closed — listener exiting");
161 break;
162 }
163 tracing::warn!(
164 "received SIGHUP — treating as graceful shutdown per Task 9 §B.4 \
165 (hot-reload is out of scope)"
166 );
167 shutdown.cancel();
168 }
169 });
170
171 Ok(vec![h_term, h_int, h_hup])
172}
173
174/// Windows: register a Ctrl-C listener.
175#[cfg(not(unix))]
176fn register_handlers(
177 shutdown: CancellationToken,
178) -> DaemonResult<Vec<tokio::task::JoinHandle<()>>> {
179 let h = tokio::spawn(async move {
180 loop {
181 if let Err(e) = tokio::signal::ctrl_c().await {
182 tracing::warn!("ctrl-c listener error: {e} — stopping Ctrl-C handler");
183 break;
184 }
185 tracing::info!("received Ctrl-C — initiating graceful shutdown");
186 shutdown.cancel();
187 }
188 });
189
190 Ok(vec![h])
191}
192
193// ── Tests ─────────────────────────────────────────────────────────────────────
194
195#[cfg(test)]
196mod tests {
197 use std::time::Duration;
198
199 use tokio_util::sync::CancellationToken;
200
201 use super::install_signal_handlers;
202
203 // ── helper ───────────────────────────────────────────────────────────
204
205 /// Wait for `token` to be cancelled with a generous timeout so that slow
206 /// CI machines do not produce spurious failures. Returns `true` if the
207 /// token was cancelled before the timeout, `false` otherwise.
208 async fn wait_cancelled(token: &CancellationToken) -> bool {
209 tokio::time::timeout(Duration::from_secs(5), token.cancelled())
210 .await
211 .is_ok()
212 }
213
214 // ── Unix signal tests ─────────────────────────────────────────────────
215
216 /// SIGTERM must cancel the shutdown token.
217 ///
218 /// Sends `SIGTERM` to the current process via `libc::kill(getpid(),
219 /// SIGTERM)`. Tokio's signal back-end delivers the signal to the listener
220 /// task through its internal self-pipe; the task then calls
221 /// `shutdown.cancel()`.
222 #[cfg(unix)]
223 #[tokio::test]
224 async fn sigterm_triggers_cancellation_token() {
225 let shutdown = CancellationToken::new();
226 let _guard = install_signal_handlers(shutdown.clone())
227 .expect("install_signal_handlers must succeed");
228
229 // Give the listener tasks a moment to start before sending the signal.
230 tokio::task::yield_now().await;
231
232 // SAFETY: `getpid()` returns the calling process's PID; `kill(pid,
233 // SIGTERM)` sends SIGTERM only to this process. Tokio has already
234 // registered its async-signal-safe forwarder, so the signal is
235 // delivered to the listener task rather than terminating the process.
236 let pid = unsafe { libc::getpid() };
237 let rc = unsafe { libc::kill(pid, libc::SIGTERM) };
238 assert_eq!(rc, 0, "kill(getpid(), SIGTERM) must succeed");
239
240 assert!(
241 wait_cancelled(&shutdown).await,
242 "shutdown token must be cancelled after SIGTERM"
243 );
244 }
245
246 /// SIGINT must cancel the shutdown token.
247 #[cfg(unix)]
248 #[tokio::test]
249 async fn sigint_triggers_cancellation_token() {
250 let shutdown = CancellationToken::new();
251 let _guard = install_signal_handlers(shutdown.clone())
252 .expect("install_signal_handlers must succeed");
253
254 tokio::task::yield_now().await;
255
256 let pid = unsafe { libc::getpid() };
257 let rc = unsafe { libc::kill(pid, libc::SIGINT) };
258 assert_eq!(rc, 0, "kill(getpid(), SIGINT) must succeed");
259
260 assert!(
261 wait_cancelled(&shutdown).await,
262 "shutdown token must be cancelled after SIGINT"
263 );
264 }
265
266 /// SIGHUP must cancel the shutdown token.
267 ///
268 /// The SIGHUP listener emits a WARN-level log ("treating as graceful
269 /// shutdown per Task 9 §B.4") before cancelling. This test verifies the
270 /// cancellation behaviour; the exact log output is not captured here
271 /// because it would require a custom tracing subscriber, which adds
272 /// fragility.
273 #[cfg(unix)]
274 #[tokio::test]
275 async fn sighup_triggers_cancellation_token_with_warn_log() {
276 let shutdown = CancellationToken::new();
277 let _guard = install_signal_handlers(shutdown.clone())
278 .expect("install_signal_handlers must succeed");
279
280 tokio::task::yield_now().await;
281
282 let pid = unsafe { libc::getpid() };
283 let rc = unsafe { libc::kill(pid, libc::SIGHUP) };
284 assert_eq!(rc, 0, "kill(getpid(), SIGHUP) must succeed");
285
286 assert!(
287 wait_cancelled(&shutdown).await,
288 "shutdown token must be cancelled after SIGHUP"
289 );
290 }
291
292 /// `install_signal_handlers` must be callable repeatedly (each call
293 /// registers a fresh set of listeners on the same signals, which tokio
294 /// supports). This also verifies that dropping a guard stops the previous
295 /// listeners without affecting a subsequent `install_signal_handlers` call.
296 #[cfg(unix)]
297 #[tokio::test]
298 async fn install_is_idempotent_across_independent_invocations() {
299 // First guard installed and immediately dropped — its listeners are
300 // aborted but the signal streams are destroyed cleanly.
301 {
302 let shutdown = CancellationToken::new();
303 let _guard =
304 install_signal_handlers(shutdown.clone()).expect("first install must succeed");
305 }
306
307 // Second installation on a fresh token must still succeed.
308 let shutdown2 = CancellationToken::new();
309 let _guard2 = install_signal_handlers(shutdown2.clone())
310 .expect("second install after drop must succeed");
311
312 tokio::task::yield_now().await;
313
314 let pid = unsafe { libc::getpid() };
315 let rc = unsafe { libc::kill(pid, libc::SIGTERM) };
316 assert_eq!(rc, 0, "kill(getpid(), SIGTERM) must succeed");
317
318 assert!(
319 wait_cancelled(&shutdown2).await,
320 "second shutdown token must be cancelled after SIGTERM"
321 );
322 }
323
324 /// Dropping `SignalGuard` before a signal is received must not cancel the
325 /// token. The listener tasks have been aborted; Tokio's process-wide OS
326 /// handler still intercepts the signal (the OS does NOT restore `SIG_DFL`
327 /// on listener drop per Tokio's documented behavior), but no application
328 /// code observes it.
329 ///
330 /// NOTE: We cannot easily verify that a subsequent SIGTERM is still
331 /// intercepted by Tokio rather than killing the process, so we only assert
332 /// that the token is NOT cancelled shortly after the guard is dropped.
333 #[cfg(unix)]
334 #[tokio::test]
335 async fn dropping_guard_before_signal_does_not_cancel_token() {
336 let shutdown = CancellationToken::new();
337
338 // Install and immediately drop the guard.
339 drop(install_signal_handlers(shutdown.clone()).expect("install must succeed"));
340
341 // The token must NOT have been cancelled.
342 assert!(
343 !shutdown.is_cancelled(),
344 "token must not be cancelled when no signal has been sent"
345 );
346 }
347
348 // ── Registration-failure simulation ──────────────────────────────────
349
350 /// On Unix we cannot easily trigger a real `ENOSYS` registration failure
351 /// from a test (it only occurs in very restricted container environments),
352 /// so we instead verify the _happy-path_ installation returns `Ok` and
353 /// the guard is usable. The `DaemonError::SignalSetup` variant is
354 /// exercised by the unit tests in `error.rs` (exit code + JSON-RPC code
355 /// checks), not here.
356 #[cfg(unix)]
357 #[tokio::test]
358 async fn install_signal_handlers_returns_ok_on_happy_path() {
359 let shutdown = CancellationToken::new();
360 let result = install_signal_handlers(shutdown);
361 assert!(
362 result.is_ok(),
363 "install_signal_handlers must return Ok on a normal Unix host: {result:?}"
364 );
365 }
366
367 // ── Windows / non-Unix Ctrl-C test ────────────────────────────────────
368
369 /// On Windows (and other non-Unix platforms), install must succeed.
370 /// We cannot trigger a synthetic Ctrl-C in tests without platform-specific
371 /// APIs, so this test only validates successful installation.
372 #[cfg(not(unix))]
373 #[tokio::test]
374 async fn install_signal_handlers_returns_ok_on_non_unix() {
375 let shutdown = CancellationToken::new();
376 let result = install_signal_handlers(shutdown);
377 assert!(
378 result.is_ok(),
379 "install_signal_handlers must return Ok on a non-Unix host: {result:?}"
380 );
381 }
382}