Skip to main content

tokio_process_tools/process_handle/termination/
mod.rs

1use super::ProcessHandle;
2use crate::error::{
3    TerminationAttemptError, TerminationAttemptOperation, TerminationAttemptPhase, TerminationError,
4};
5use crate::output_stream::OutputStream;
6use std::borrow::Cow;
7use std::error::Error;
8use std::io;
9#[cfg(any(unix, windows))]
10use std::marker::PhantomData;
11use std::process::ExitStatus;
12use std::time::Duration;
13
14/// Maximum time to wait for process termination after forceful kill.
15///
16/// This is a safety timeout since forceful kill should terminate processes immediately,
17/// but there are rare cases where even forceful kill may not work.
18#[cfg(any(unix, windows))]
19const FORCE_KILL_WAIT_TIMEOUT: Duration = Duration::from_secs(3);
20
21/// Grace window granted to Tokio's SIGCHLD reaper after a signal-send failure so a freshly-exited
22/// child is observed as exited rather than as still running. Covers the brief race where the OS
23/// rejects signals to a not-yet-reaped process group (`EPERM` on macOS, `ESRCH` on Linux).
24#[cfg(any(unix, windows))]
25const REAP_AFTER_SIGNAL_FAILURE_GRACE: Duration = Duration::from_millis(100);
26
27#[cfg(unix)]
28const INTERRUPT_LABEL: &str = "SIGINT";
29#[cfg(unix)]
30const TERMINATE_LABEL: &str = "SIGTERM";
31
32#[cfg(windows)]
33const GRACEFUL_LABEL: &str = "CTRL_BREAK_EVENT";
34
35/// Label recorded in diagnostics for the forceful kill phase. Cross-platform because `kill()` is
36/// available on every platform Tokio supports (the underlying `Child::start_kill()` is what runs
37/// on targets where graceful escalation is unavailable).
38#[cfg(unix)]
39const KILL_LABEL: &str = "SIGKILL";
40#[cfg(windows)]
41const KILL_LABEL: &str = "TerminateProcess";
42#[cfg(not(any(unix, windows)))]
43const KILL_LABEL: &str = "kill";
44
45/// Per-platform graceful-shutdown timeout budget passed to [`ProcessHandle::terminate`] and
46/// related APIs.
47///
48/// The shape mirrors the platform's actual graceful-shutdown model. On Unix the type carries
49/// two separate timeout, one per graceful phase (`SIGINT` then `SIGTERM`). On Windows it carries
50/// a single timeout for the single graceful phase (`CTRL_BREAK_EVENT`)  on that platform.
51///
52/// # Cross-platform construction
53///
54/// Use [`GracefulTimeouts::builder`] to write a single cross-platform construction expression.
55/// The setter for the platform that does not match the current target accepts its arguments
56/// without using them, so no cfg gates are needed at the call site:
57///
58/// ```rust
59/// use std::time::Duration;
60/// use tokio_process_tools::{GracefulTimeouts, both};
61///
62/// let timeouts = GracefulTimeouts::builder()
63///     .unix((Duration::from_secs(3), Duration::from_secs(5)))
64///     .windows(Duration::from_secs(8))
65///     .build();
66///
67/// // For the common case where both Unix phases share a value:
68/// let timeouts = GracefulTimeouts::builder()
69///     .unix(both(Duration::from_secs(3)))
70///     .windows(Duration::from_secs(8))
71///     .build();
72/// ```
73///
74/// # Platform-specific construction
75///
76/// Code that intentionally tunes Unix and Windows independently can also construct the value
77/// directly with cfg gates:
78///
79/// ```rust
80/// use std::time::Duration;
81/// use tokio_process_tools::GracefulTimeouts;
82///
83/// #[cfg(unix)]
84/// let timeouts = GracefulTimeouts {
85///     interrupt_timeout: Duration::from_secs(3),
86///     terminate_timeout: Duration::from_secs(5),
87/// };
88/// #[cfg(windows)]
89/// let timeouts = GracefulTimeouts {
90///     graceful_timeout: Duration::from_secs(8),
91/// };
92/// ```
93///
94/// # Platform availability
95///
96/// This type is only available on Unix and Windows because the underlying graceful-shutdown
97/// signals only exist there. On other Tokio-supported targets the spawn, wait,
98/// output-collection, and [`ProcessHandle::kill`] APIs remain available; only the
99/// graceful-termination surface (`terminate(...)`, `terminate_on_drop(...)`,
100/// `wait_for_completion_or_terminate(...)`, the `send_*_signal(...)` methods, and this type) is
101/// gated out.
102#[cfg(any(unix, windows))]
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub struct GracefulTimeouts {
105    /// Maximum time to wait after sending `SIGINT` before escalating to `SIGTERM`.
106    #[cfg(unix)]
107    pub interrupt_timeout: Duration,
108    /// Maximum time to wait after sending `SIGTERM` before escalating to `SIGKILL`.
109    #[cfg(unix)]
110    pub terminate_timeout: Duration,
111    /// Maximum time to wait after sending `CTRL_BREAK_EVENT` before escalating to
112    /// `TerminateProcess`.
113    #[cfg(windows)]
114    pub graceful_timeout: Duration,
115}
116
117#[cfg(any(unix, windows))]
118impl GracefulTimeouts {
119    /// Start a fluent specification of a `GracefulTimeouts` value.
120    ///
121    /// Call [`unix`](GracefulTimeoutsBuilder::unix), then
122    /// [`windows`](GracefulTimeoutsBuilder::windows), then
123    /// [`build`](GracefulTimeoutsBuilder::build). The setter for the platform that does not
124    /// match the current target accepts its arguments without using them, which lets
125    /// cross-platform code construct the value without cfg gates.
126    ///
127    /// See the [type-level documentation](GracefulTimeouts#cross-platform-construction) for an
128    /// example.
129    #[must_use]
130    pub fn builder() -> GracefulTimeoutsBuilder<UnixUnset> {
131        GracefulTimeoutsBuilder {
132            #[cfg(unix)]
133            interrupt_timeout: Duration::ZERO,
134            #[cfg(unix)]
135            terminate_timeout: Duration::ZERO,
136            #[cfg(windows)]
137            graceful_timeout: Duration::ZERO,
138            _state: PhantomData,
139        }
140    }
141
142    /// Combined graceful-shutdown budget, used for downstream output-collection deadlines.
143    pub(crate) fn total(self) -> Duration {
144        #[cfg(unix)]
145        {
146            self.interrupt_timeout
147                .saturating_add(self.terminate_timeout)
148        }
149        #[cfg(windows)]
150        {
151            self.graceful_timeout
152        }
153    }
154}
155
156/// Typestate marker indicating that the Unix-side budgets have not been provided yet.
157#[cfg(any(unix, windows))]
158#[doc(hidden)]
159#[derive(Debug, Clone, Copy)]
160pub struct UnixUnset;
161
162/// Typestate marker indicating that the Unix-side budgets have been provided but the
163/// Windows-side budget has not.
164#[cfg(any(unix, windows))]
165#[doc(hidden)]
166#[derive(Debug, Clone, Copy)]
167pub struct UnixSet;
168
169/// Typestate marker indicating that both the Unix-side and Windows-side budgets have been
170/// provided. A builder in this state can be finished with
171/// [`build`](GracefulTimeoutsBuilder::build).
172#[cfg(any(unix, windows))]
173#[doc(hidden)]
174#[derive(Debug, Clone, Copy)]
175pub struct BothSet;
176
177/// Typestate builder for [`GracefulTimeouts`]. Created via [`GracefulTimeouts::builder`].
178///
179/// Both [`unix`](Self::unix) and [`windows`](Self::windows) must be called (in that order)
180/// before [`build`](Self::build) becomes available. The setter for the platform that does not
181/// match the current target accepts its arguments without using them, so cross-platform code
182/// can build a value without cfg gates.
183#[cfg(any(unix, windows))]
184#[derive(Debug, Clone, Copy)]
185pub struct GracefulTimeoutsBuilder<State> {
186    #[cfg(unix)]
187    interrupt_timeout: Duration,
188    #[cfg(unix)]
189    terminate_timeout: Duration,
190    #[cfg(windows)]
191    graceful_timeout: Duration,
192    _state: PhantomData<fn() -> State>,
193}
194
195#[cfg(any(unix, windows))]
196impl GracefulTimeoutsBuilder<UnixUnset> {
197    /// Set the Unix-side budgets as a `(interrupt_timeout, terminate_timeout)` tuple.
198    ///
199    /// Use [`both`] for the common case where both phases share the same value.
200    ///
201    /// On non-Unix targets the tuple is accepted but unused.
202    #[must_use]
203    pub fn unix(self, timeouts: (Duration, Duration)) -> GracefulTimeoutsBuilder<UnixSet> {
204        #[cfg(not(unix))]
205        let _ = timeouts;
206        GracefulTimeoutsBuilder {
207            #[cfg(unix)]
208            interrupt_timeout: timeouts.0,
209            #[cfg(unix)]
210            terminate_timeout: timeouts.1,
211            #[cfg(windows)]
212            graceful_timeout: self.graceful_timeout,
213            _state: PhantomData,
214        }
215    }
216}
217
218#[cfg(any(unix, windows))]
219impl GracefulTimeoutsBuilder<UnixSet> {
220    /// Set the Windows-side graceful budget.
221    ///
222    /// On non-Windows targets the value is accepted but unused.
223    #[must_use]
224    pub fn windows(self, graceful_timeout: Duration) -> GracefulTimeoutsBuilder<BothSet> {
225        #[cfg(not(windows))]
226        let _ = graceful_timeout;
227        GracefulTimeoutsBuilder {
228            #[cfg(unix)]
229            interrupt_timeout: self.interrupt_timeout,
230            #[cfg(unix)]
231            terminate_timeout: self.terminate_timeout,
232            #[cfg(windows)]
233            graceful_timeout,
234            _state: PhantomData,
235        }
236    }
237}
238
239#[cfg(any(unix, windows))]
240impl GracefulTimeoutsBuilder<BothSet> {
241    /// Finish the builder, producing a [`GracefulTimeouts`] populated from the
242    /// platform-relevant inputs.
243    #[must_use]
244    pub fn build(self) -> GracefulTimeouts {
245        GracefulTimeouts {
246            #[cfg(unix)]
247            interrupt_timeout: self.interrupt_timeout,
248            #[cfg(unix)]
249            terminate_timeout: self.terminate_timeout,
250            #[cfg(windows)]
251            graceful_timeout: self.graceful_timeout,
252        }
253    }
254}
255
256/// Returns `(d, d)`. Convenience for [`GracefulTimeoutsBuilder::unix`] when both Unix phases
257/// share a value.
258#[cfg(any(unix, windows))]
259#[must_use]
260pub const fn both(d: Duration) -> (Duration, Duration) {
261    (d, d)
262}
263
264#[cfg(any(unix, windows))]
265#[derive(Debug, Clone, Copy, PartialEq, Eq)]
266pub(crate) struct TerminationOutcome {
267    pub(crate) exit_status: ExitStatus,
268    pub(crate) output_collection_timeout_extension: Duration,
269}
270
271#[cfg(any(unix, windows))]
272impl TerminationOutcome {
273    fn graceful_success(exit_status: ExitStatus) -> Self {
274        Self {
275            exit_status,
276            output_collection_timeout_extension: Duration::ZERO,
277        }
278    }
279
280    fn force_kill_success(exit_status: ExitStatus) -> Self {
281        Self {
282            exit_status,
283            output_collection_timeout_extension: FORCE_KILL_WAIT_TIMEOUT,
284        }
285    }
286}
287
288#[cfg(any(unix, windows))]
289#[derive(Debug, Clone, Copy)]
290enum GracefulTerminationPhase {
291    #[cfg(unix)]
292    Interrupt,
293    Terminate,
294}
295
296#[cfg(any(unix, windows))]
297impl GracefulTerminationPhase {
298    fn attempt_phase(self) -> TerminationAttemptPhase {
299        match self {
300            #[cfg(unix)]
301            Self::Interrupt => TerminationAttemptPhase::Interrupt,
302            Self::Terminate => TerminationAttemptPhase::Terminate,
303        }
304    }
305}
306
307#[derive(Debug, Default)]
308struct TerminationDiagnostics {
309    attempt_errors: Vec<TerminationAttemptError>,
310}
311
312impl TerminationDiagnostics {
313    #[cfg(any(unix, windows))]
314    fn record_preflight_status_error(&mut self, error: impl Error + Send + Sync + 'static) {
315        self.record(
316            TerminationAttemptPhase::Preflight,
317            TerminationAttemptOperation::CheckStatus,
318            None,
319            error,
320        );
321    }
322
323    #[cfg(any(unix, windows))]
324    fn record_graceful_signal_error(
325        &mut self,
326        phase: GracefulTerminationPhase,
327        signal_name: &'static str,
328        error: impl Error + Send + Sync + 'static,
329    ) {
330        self.record(
331            phase.attempt_phase(),
332            TerminationAttemptOperation::SendSignal,
333            Some(signal_name),
334            error,
335        );
336    }
337
338    #[cfg(any(unix, windows))]
339    fn record_graceful_wait_error(
340        &mut self,
341        phase: GracefulTerminationPhase,
342        signal_name: &'static str,
343        error: impl Error + Send + Sync + 'static,
344    ) {
345        self.record(
346            phase.attempt_phase(),
347            TerminationAttemptOperation::WaitForExit,
348            Some(signal_name),
349            error,
350        );
351    }
352
353    #[cfg(any(unix, windows))]
354    fn record_graceful_status_error(
355        &mut self,
356        phase: GracefulTerminationPhase,
357        signal_name: &'static str,
358        error: impl Error + Send + Sync + 'static,
359    ) {
360        self.record(
361            phase.attempt_phase(),
362            TerminationAttemptOperation::CheckStatus,
363            Some(signal_name),
364            error,
365        );
366    }
367
368    fn record_kill_signal_error(&mut self, error: impl Error + Send + Sync + 'static) {
369        self.record(
370            TerminationAttemptPhase::Kill,
371            TerminationAttemptOperation::SendSignal,
372            Some(KILL_LABEL),
373            error,
374        );
375    }
376
377    fn record_kill_wait_error(&mut self, error: impl Error + Send + Sync + 'static) {
378        self.record(
379            TerminationAttemptPhase::Kill,
380            TerminationAttemptOperation::WaitForExit,
381            Some(KILL_LABEL),
382            error,
383        );
384    }
385
386    #[cfg(any(unix, windows))]
387    fn record_kill_status_error(&mut self, error: impl Error + Send + Sync + 'static) {
388        self.record(
389            TerminationAttemptPhase::Kill,
390            TerminationAttemptOperation::CheckStatus,
391            Some(KILL_LABEL),
392            error,
393        );
394    }
395
396    fn record(
397        &mut self,
398        phase: TerminationAttemptPhase,
399        operation: TerminationAttemptOperation,
400        signal_name: Option<&'static str>,
401        error: impl Error + Send + Sync + 'static,
402    ) {
403        self.attempt_errors.push(TerminationAttemptError {
404            phase,
405            operation,
406            signal_name,
407            source: Box::new(error),
408        });
409    }
410
411    #[must_use]
412    fn into_termination_failed(self, process_name: Cow<'static, str>) -> TerminationError {
413        assert!(
414            !self.attempt_errors.is_empty(),
415            "into_termination_failed must not be used when no error was recorded!",
416        );
417
418        TerminationError::TerminationFailed {
419            process_name,
420            attempt_errors: self.attempt_errors,
421        }
422    }
423
424    #[cfg(any(unix, windows))]
425    #[must_use]
426    fn into_signal_failed(self, process_name: Cow<'static, str>) -> TerminationError {
427        assert!(
428            !self.attempt_errors.is_empty(),
429            "into_signal_failed must not be used when no error was recorded!",
430        );
431
432        TerminationError::SignalFailed {
433            process_name,
434            attempt_errors: self.attempt_errors,
435        }
436    }
437}
438
439// Cross-platform termination methods. `kill()` and the Drop best-effort cleanup work on every
440// Tokio-supported platform via `tokio::process::Child::start_kill()`, so they stay available
441// even on targets where graceful-termination escalation is not.
442impl<Stdout, Stderr> ProcessHandle<Stdout, Stderr>
443where
444    Stdout: OutputStream,
445    Stderr: OutputStream,
446{
447    /// Forces the process to exit. Most users should call [`ProcessHandle::terminate`] instead.
448    ///
449    /// This is equivalent to sending `SIGKILL` on Unix or calling `TerminateProcess` on Windows,
450    /// followed by wait. On other Tokio-supported platforms it forwards to
451    /// [`tokio::process::Child::start_kill`].
452    /// Any still-open stdin handle is closed before Tokio performs that kill-and-wait sequence,
453    /// matching [`tokio::process::Child::kill`] semantics.
454    /// A successful call waits for the child to exit and disarms the drop cleanup and panic guards,
455    /// so the handle can be dropped safely afterward.
456    ///
457    /// `kill` is a reasonable next step when [`terminate`](Self::terminate) returns `Err` and the
458    /// caller is not interested in further graceful escalation.
459    ///
460    /// # Errors
461    ///
462    /// Returns [`TerminationError`] if Tokio cannot kill or wait for the child process.
463    pub async fn kill(&mut self) -> Result<(), TerminationError> {
464        self.kill_inner(Self::start_kill_raw).await
465    }
466
467    pub(super) async fn kill_inner<StartKill>(
468        &mut self,
469        mut start_kill: StartKill,
470    ) -> Result<(), TerminationError>
471    where
472        StartKill: FnMut(&mut Self) -> Result<(), io::Error>,
473    {
474        self.stdin().close();
475        let mut diagnostics = TerminationDiagnostics::default();
476
477        if let Err(err) = start_kill(self) {
478            diagnostics.record_kill_signal_error(err);
479            return Err(diagnostics.into_termination_failed(self.name.clone()));
480        }
481
482        if let Err(err) = self.wait_for_completion_unbounded_inner().await {
483            diagnostics.record_kill_wait_error(err);
484            return Err(diagnostics.into_termination_failed(self.name.clone()));
485        }
486
487        Ok(())
488    }
489
490    pub(super) fn start_kill_raw(&mut self) -> Result<(), io::Error> {
491        Self::start_kill_process_group(&mut self.child)
492    }
493
494    /// Sends `SIGKILL` to the child's process group on Unix and forwards to Tokio's
495    /// `Child::start_kill` everywhere else.
496    ///
497    /// On Unix the child is the leader of a process group set up at spawn time, so targeting the
498    /// group reaches any grandchildren the child has fork-execed. Tokio's stock `start_kill`
499    /// targets only the child's PID and would orphan that subtree. On Windows the standard
500    /// `TerminateProcess` semantics still apply; the pre-kill `CTRL_BREAK_EVENT` step in
501    /// [`Self::terminate`] is what reaches the rest of the console process group there. On other
502    /// Tokio-supported platforms there is no library-managed process-group setup, so `start_kill`
503    /// targets the child directly.
504    pub(super) fn start_kill_process_group(
505        child: &mut tokio::process::Child,
506    ) -> Result<(), io::Error> {
507        #[cfg(unix)]
508        {
509            match child.id() {
510                Some(pid) => crate::signal::send_kill_to_process_group(pid),
511                // Already reaped. Tokio's start_kill would have surfaced this as an error;
512                // matching its behavior keeps the caller paths identical.
513                None => child.start_kill(),
514            }
515        }
516        #[cfg(not(unix))]
517        {
518            child.start_kill()
519        }
520    }
521}
522
523// Graceful-termination methods. Only available on Unix and Windows because they rely on platform
524// signal primitives that have no cross-platform analogue.
525#[cfg(any(unix, windows))]
526impl<Stdout, Stderr> ProcessHandle<Stdout, Stderr>
527where
528    Stdout: OutputStream,
529    Stderr: OutputStream,
530{
531    /// Manually send `SIGINT` to this process's process group via `killpg`.
532    ///
533    /// `SIGINT` is the dedicated user-interrupt signal, distinct from the `SIGTERM` delivered by
534    /// [`Self::send_terminate_signal`]. The signal targets the child's process group, so any
535    /// grandchildren the child has fork-execed are signaled together with the leader.
536    ///
537    /// If the process has already exited, this reaps it and returns `Ok(())` instead of
538    /// attempting to signal a stale PID or process group. If the signal send fails because the
539    /// child exited after the preflight check, this also reaps it and returns `Ok(())`.
540    ///
541    /// Prefer to call `terminate` instead, if you want to make sure this process is terminated.
542    ///
543    /// This method is Unix-only because Windows has no targetable `SIGINT` analogue:
544    /// `GenerateConsoleCtrlEvent` only accepts `CTRL_BREAK_EVENT` for nonzero process groups.
545    /// On Windows, use `send_ctrl_break_signal` instead.
546    ///
547    /// # Errors
548    ///
549    /// Returns [`TerminationError`] if the process status could not be checked or if `SIGINT`
550    /// could not be sent.
551    #[cfg(unix)]
552    pub fn send_interrupt_signal(&mut self) -> Result<(), TerminationError> {
553        self.send_signal_with_preflight_reap(
554            GracefulTerminationPhase::Interrupt,
555            INTERRUPT_LABEL,
556            crate::signal::send_interrupt,
557        )
558    }
559
560    /// Manually send `SIGTERM` to this process's process group via `killpg`.
561    ///
562    /// `SIGTERM` is the conventional "asked to terminate" signal sent by service supervisors and
563    /// the operating system at shutdown. The signal targets the child's process group, so any
564    /// grandchildren the child has fork-execed are signaled together with the leader.
565    ///
566    /// If the process has already exited, this reaps it and returns `Ok(())` instead of
567    /// attempting to signal a stale PID or process group. If the signal send fails because the
568    /// child exited after the preflight check, this also reaps it and returns `Ok(())`.
569    ///
570    /// Prefer to call `terminate` instead, if you want to make sure this process is terminated.
571    ///
572    /// This method is Unix-only because Windows has no targetable `SIGTERM` analogue:
573    /// `GenerateConsoleCtrlEvent` only accepts `CTRL_BREAK_EVENT` for nonzero process groups.
574    /// On Windows, use `send_ctrl_break_signal` instead.
575    ///
576    /// # Errors
577    ///
578    /// Returns [`TerminationError`] if the process status could not be checked or if `SIGTERM`
579    /// could not be sent.
580    #[cfg(unix)]
581    pub fn send_terminate_signal(&mut self) -> Result<(), TerminationError> {
582        self.send_signal_with_preflight_reap(
583            GracefulTerminationPhase::Terminate,
584            TERMINATE_LABEL,
585            crate::signal::send_terminate,
586        )
587    }
588
589    /// Manually deliver `CTRL_BREAK_EVENT` to this process's console process group via
590    /// `GenerateConsoleCtrlEvent`.
591    ///
592    /// `CTRL_BREAK_EVENT` is the only console control event that can be targeted at a nonzero
593    /// process group: `CTRL_C_EVENT` requires `dwProcessGroupId = 0` and would be broadcast to
594    /// every process sharing the calling console (including the parent), so it is not usable to
595    /// terminate a single child group. There is therefore no separate `SIGINT` vs. `SIGTERM`
596    /// distinction on Windows; this single method covers the entire graceful-shutdown surface.
597    ///
598    /// If the process has already exited, this reaps it and returns `Ok(())` instead of
599    /// attempting to signal a stale PID or process group. If the signal send fails because the
600    /// child exited after the preflight check, this also reaps it and returns `Ok(())`.
601    ///
602    /// Prefer to call `terminate` instead, if you want to make sure this process is terminated.
603    ///
604    /// This method is Windows-only. On Unix, use `send_interrupt_signal` or
605    /// `send_terminate_signal` instead.
606    ///
607    /// # Errors
608    ///
609    /// Returns [`TerminationError`] if the process status could not be checked or if
610    /// `CTRL_BREAK_EVENT` could not be delivered.
611    #[cfg(windows)]
612    pub fn send_ctrl_break_signal(&mut self) -> Result<(), TerminationError> {
613        self.send_signal_with_preflight_reap(
614            GracefulTerminationPhase::Terminate,
615            GRACEFUL_LABEL,
616            crate::signal::send_ctrl_break,
617        )
618    }
619
620    /// Terminates this process by sending platform graceful shutdown signals first, then killing
621    /// the process if it does not complete after receiving them.
622    ///
623    /// The signature is the same on every supported platform; the shape of `timeouts` is
624    /// platform-conditional. See [`GracefulTimeouts`] for how to construct one.
625    ///
626    /// - On Unix this is a 3-phase escalation: `SIGINT` -> wait `timeouts.interrupt_timeout` ->
627    ///   `SIGTERM` -> wait `timeouts.terminate_timeout` -> `SIGKILL`. The two distinct graceful
628    ///   signals matter in practice: idiomatic async Rust binaries use `tokio::signal::ctrl_c()`
629    ///   (which on Unix listens only for `SIGINT`), and Python child processes turn `SIGINT`
630    ///   into a `KeyboardInterrupt` exception that runs `try/finally` cleanup, while `SIGTERM`
631    ///   falls through to the runtime's default handler.
632    /// - On Windows this is a 2-phase termination: `CTRL_BREAK_EVENT` -> wait
633    ///   `timeouts.graceful_timeout` -> `TerminateProcess`. **Only one `CTRL_BREAK_EVENT` is
634    ///   ever sent.** `GenerateConsoleCtrlEvent` can only target a child's process group with
635    ///   `CTRL_BREAK_EVENT` (sending `CTRL_C_EVENT` would require `dwProcessGroupId = 0` and
636    ///   broadcast to the parent), so a second graceful send would be the same event and cannot
637    ///   do more than the first send already did.
638    ///
639    /// The forceful kill fallback adds one fixed 3-second wait on top of the graceful timeouts.
640    ///
641    /// # Windows interop note
642    ///
643    /// `tokio::signal::ctrl_c()` on Windows registers only for `CTRL_C_EVENT`; it does not catch
644    /// `CTRL_BREAK_EVENT`. A child Rust binary that listens only on the cross-platform
645    /// `tokio::signal::ctrl_c()` will not respond to this graceful step on Windows and will be
646    /// terminated forcefully after `graceful_timeout`. To interoperate, such a child should
647    /// additionally listen on `tokio::signal::windows::ctrl_break()`, or expose another
648    /// shutdown channel (stdin sentinel, IPC, or a command protocol).
649    ///
650    /// # Timeouts
651    ///
652    /// Each per-phase timeout in `timeouts` bounds the post-signal wait of its phase:
653    ///
654    /// - Signal send succeeds: wait up to the user-supplied timeout, then escalate.
655    /// - Signal send fails: replace the user timeout with a fixed 100 ms grace so Tokio's
656    ///   reaper can catch up to a child that just exited (the OS rejects signals to a not-yet-
657    ///   reaped process group with `EPERM` on macOS or `ESRCH` on Linux). Real permission
658    ///   denials still surface as an error after the grace elapses.
659    ///
660    /// `Duration::from_secs(0)` disables the post-signal wait entirely and effectively forces
661    /// the call into the forceful kill (`SIGKILL` on Unix, `TerminateProcess` on Windows).
662    /// Prefer small but non-zero values (e.g. 100 ms to a few seconds).
663    ///
664    /// # Drop guards on `Ok` vs `Err`
665    ///
666    /// On `Ok`, the drop cleanup and panic guards are disarmed and the handle can be dropped
667    /// safely. On `Err` (or if the future is canceled), the guards stay armed: the library cannot
668    /// verify cleanup from the outside, so dropping would leak a process. Recover by retrying
669    /// `terminate`, escalating to [`kill`](Self::kill), calling
670    /// [`must_not_be_terminated`](Self::must_not_be_terminated) to accept the failure, or
671    /// propagating the error and letting the panic-on-drop surface the leak.
672    ///
673    /// # Errors
674    ///
675    /// Returns [`TerminationError`] if signalling or waiting for process termination fails.
676    pub async fn terminate(
677        &mut self,
678        timeouts: GracefulTimeouts,
679    ) -> Result<ExitStatus, TerminationError> {
680        self.terminate_detailed(timeouts)
681            .await
682            .map(|outcome| outcome.exit_status)
683    }
684
685    pub(crate) async fn terminate_detailed(
686        &mut self,
687        timeouts: GracefulTimeouts,
688    ) -> Result<TerminationOutcome, TerminationError> {
689        #[cfg(unix)]
690        {
691            self.terminate_inner_with_preflight_reaper(
692                timeouts,
693                Self::try_reap_exit_status,
694                Self::send_interrupt_signal_raw,
695                Self::send_terminate_signal_raw,
696            )
697            .await
698        }
699        #[cfg(windows)]
700        {
701            self.terminate_inner_with_preflight_reaper(
702                timeouts,
703                Self::try_reap_exit_status,
704                Self::send_ctrl_break_signal_raw,
705            )
706            .await
707        }
708    }
709
710    #[cfg(all(test, unix))]
711    async fn terminate_inner<InterruptSignalSender, TerminateSignalSender>(
712        &mut self,
713        interrupt_timeout: Duration,
714        terminate_timeout: Duration,
715        send_interrupt_signal: InterruptSignalSender,
716        send_terminate_signal: TerminateSignalSender,
717    ) -> Result<ExitStatus, TerminationError>
718    where
719        InterruptSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
720        TerminateSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
721    {
722        self.terminate_inner_detailed(
723            interrupt_timeout,
724            terminate_timeout,
725            send_interrupt_signal,
726            send_terminate_signal,
727        )
728        .await
729        .map(|outcome| outcome.exit_status)
730    }
731
732    #[cfg(all(test, windows))]
733    async fn terminate_inner<GracefulSignalSender>(
734        &mut self,
735        graceful_timeout: Duration,
736        send_graceful_signal: GracefulSignalSender,
737    ) -> Result<ExitStatus, TerminationError>
738    where
739        GracefulSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
740    {
741        self.terminate_inner_detailed(graceful_timeout, send_graceful_signal)
742            .await
743            .map(|outcome| outcome.exit_status)
744    }
745
746    #[cfg(all(test, unix))]
747    async fn terminate_inner_detailed<InterruptSignalSender, TerminateSignalSender>(
748        &mut self,
749        interrupt_timeout: Duration,
750        terminate_timeout: Duration,
751        send_interrupt_signal: InterruptSignalSender,
752        send_terminate_signal: TerminateSignalSender,
753    ) -> Result<TerminationOutcome, TerminationError>
754    where
755        InterruptSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
756        TerminateSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
757    {
758        self.terminate_inner_with_preflight_reaper(
759            GracefulTimeouts {
760                interrupt_timeout,
761                terminate_timeout,
762            },
763            Self::try_reap_exit_status,
764            send_interrupt_signal,
765            send_terminate_signal,
766        )
767        .await
768    }
769
770    #[cfg(all(test, windows))]
771    async fn terminate_inner_detailed<GracefulSignalSender>(
772        &mut self,
773        graceful_timeout: Duration,
774        send_graceful_signal: GracefulSignalSender,
775    ) -> Result<TerminationOutcome, TerminationError>
776    where
777        GracefulSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
778    {
779        self.terminate_inner_with_preflight_reaper(
780            GracefulTimeouts { graceful_timeout },
781            Self::try_reap_exit_status,
782            send_graceful_signal,
783        )
784        .await
785    }
786
787    #[cfg(unix)]
788    async fn terminate_inner_with_preflight_reaper<
789        PreflightReaper,
790        InterruptSignalSender,
791        TerminateSignalSender,
792    >(
793        &mut self,
794        timeouts: GracefulTimeouts,
795        mut try_reap_exit_status: PreflightReaper,
796        mut send_interrupt_signal: InterruptSignalSender,
797        mut send_terminate_signal: TerminateSignalSender,
798    ) -> Result<TerminationOutcome, TerminationError>
799    where
800        PreflightReaper: FnMut(&mut Self) -> Result<Option<ExitStatus>, io::Error>,
801        InterruptSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
802        TerminateSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
803    {
804        let result = 'termination: {
805            let mut diagnostics = TerminationDiagnostics::default();
806
807            match try_reap_exit_status(self) {
808                Ok(Some(exit_status)) => {
809                    break 'termination Ok(TerminationOutcome::graceful_success(exit_status));
810                }
811                Ok(None) => {}
812                Err(err) => {
813                    tracing::warn!(
814                        process = %self.name,
815                        signal = INTERRUPT_LABEL,
816                        error = %err,
817                        "Could not determine process state before termination. Attempting interrupt signal."
818                    );
819                    diagnostics.record_preflight_status_error(err);
820                }
821            }
822            if let Some(exit_status) = self
823                .attempt_graceful_phase(
824                    INTERRUPT_LABEL,
825                    TERMINATE_LABEL,
826                    timeouts.interrupt_timeout,
827                    GracefulTerminationPhase::Interrupt,
828                    &mut diagnostics,
829                    &mut send_interrupt_signal,
830                )
831                .await
832            {
833                break 'termination Ok(exit_status);
834            }
835
836            if let Some(exit_status) = self
837                .attempt_graceful_phase(
838                    TERMINATE_LABEL,
839                    KILL_LABEL,
840                    timeouts.terminate_timeout,
841                    GracefulTerminationPhase::Terminate,
842                    &mut diagnostics,
843                    &mut send_terminate_signal,
844                )
845                .await
846            {
847                break 'termination Ok(exit_status);
848            }
849
850            self.attempt_forceful_kill(diagnostics).await
851        };
852
853        self.disarm_after_successful_termination(result)
854    }
855
856    #[cfg(windows)]
857    async fn terminate_inner_with_preflight_reaper<PreflightReaper, GracefulSignalSender>(
858        &mut self,
859        timeouts: GracefulTimeouts,
860        mut try_reap_exit_status: PreflightReaper,
861        mut send_graceful_signal: GracefulSignalSender,
862    ) -> Result<TerminationOutcome, TerminationError>
863    where
864        PreflightReaper: FnMut(&mut Self) -> Result<Option<ExitStatus>, io::Error>,
865        GracefulSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
866    {
867        let result = 'termination: {
868            let mut diagnostics = TerminationDiagnostics::default();
869
870            match try_reap_exit_status(self) {
871                Ok(Some(exit_status)) => {
872                    break 'termination Ok(TerminationOutcome::graceful_success(exit_status));
873                }
874                Ok(None) => {}
875                Err(err) => {
876                    tracing::warn!(
877                        process = %self.name,
878                        signal = GRACEFUL_LABEL,
879                        error = %err,
880                        "Could not determine process state before termination. Attempting graceful signal."
881                    );
882                    diagnostics.record_preflight_status_error(err);
883                }
884            }
885
886            if let Some(exit_status) = self
887                .attempt_graceful_phase(
888                    GRACEFUL_LABEL,
889                    KILL_LABEL,
890                    timeouts.graceful_timeout,
891                    GracefulTerminationPhase::Terminate,
892                    &mut diagnostics,
893                    &mut send_graceful_signal,
894                )
895                .await
896            {
897                break 'termination Ok(exit_status);
898            }
899
900            self.attempt_forceful_kill(diagnostics).await
901        };
902
903        self.disarm_after_successful_termination(result)
904    }
905
906    fn send_signal_with_preflight_reap<SignalSender>(
907        &mut self,
908        phase: GracefulTerminationPhase,
909        signal_name: &'static str,
910        send_signal: SignalSender,
911    ) -> Result<(), TerminationError>
912    where
913        SignalSender: FnOnce(&tokio::process::Child) -> Result<(), io::Error>,
914    {
915        self.send_signal_with_reaper(phase, signal_name, send_signal, Self::try_reap_exit_status)
916    }
917
918    fn send_signal_with_reaper<SignalSender, Reaper>(
919        &mut self,
920        phase: GracefulTerminationPhase,
921        signal_name: &'static str,
922        send_signal: SignalSender,
923        mut try_reap_exit_status: Reaper,
924    ) -> Result<(), TerminationError>
925    where
926        SignalSender: FnOnce(&tokio::process::Child) -> Result<(), io::Error>,
927        Reaper: FnMut(&mut Self) -> Result<Option<ExitStatus>, io::Error>,
928    {
929        let mut diagnostics = TerminationDiagnostics::default();
930
931        match try_reap_exit_status(self) {
932            Ok(Some(_)) => {
933                self.must_not_be_terminated();
934                Ok(())
935            }
936            Ok(None) => match send_signal(&self.child) {
937                Ok(()) => Ok(()),
938                // Sync probe only - the SIGCHLD-grace bounded wait lives on the `terminate()`
939                // path. Keeping this sync avoids making the public `send_*_signal` APIs async.
940                Err(signal_error) => match try_reap_exit_status(self) {
941                    Ok(Some(_)) => {
942                        self.must_not_be_terminated();
943                        Ok(())
944                    }
945                    Ok(None) => {
946                        diagnostics.record_graceful_signal_error(phase, signal_name, signal_error);
947                        Err(diagnostics.into_signal_failed(self.name.clone()))
948                    }
949                    Err(reap_error) => {
950                        diagnostics.record_graceful_signal_error(phase, signal_name, signal_error);
951                        diagnostics.record_graceful_status_error(phase, signal_name, reap_error);
952                        Err(diagnostics.into_signal_failed(self.name.clone()))
953                    }
954                },
955            },
956            Err(status_error) => {
957                diagnostics.record_graceful_status_error(phase, signal_name, status_error);
958                Err(diagnostics.into_signal_failed(self.name.clone()))
959            }
960        }
961    }
962
963    #[cfg(unix)]
964    fn send_interrupt_signal_raw(&mut self) -> Result<(), io::Error> {
965        crate::signal::send_interrupt(&self.child)
966    }
967
968    #[cfg(unix)]
969    fn send_terminate_signal_raw(&mut self) -> Result<(), io::Error> {
970        crate::signal::send_terminate(&self.child)
971    }
972
973    #[cfg(windows)]
974    fn send_ctrl_break_signal_raw(&mut self) -> Result<(), io::Error> {
975        crate::signal::send_ctrl_break(&self.child)
976    }
977
978    fn disarm_after_successful_termination<T>(
979        &mut self,
980        result: Result<T, TerminationError>,
981    ) -> Result<T, TerminationError> {
982        if result.is_ok() {
983            self.must_not_be_terminated();
984        }
985
986        result
987    }
988
989    async fn attempt_graceful_phase<SignalSender>(
990        &mut self,
991        signal_name: &'static str,
992        next_signal_name: &'static str,
993        timeout: Duration,
994        phase: GracefulTerminationPhase,
995        diagnostics: &mut TerminationDiagnostics,
996        send_signal: &mut SignalSender,
997    ) -> Option<TerminationOutcome>
998    where
999        SignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
1000    {
1001        match send_signal(self) {
1002            Ok(()) => {
1003                self.wait_after_graceful_signal(
1004                    signal_name,
1005                    next_signal_name,
1006                    timeout,
1007                    phase,
1008                    diagnostics,
1009                )
1010                .await
1011            }
1012            Err(err) => {
1013                tracing::warn!(
1014                    process = %self.name,
1015                    signal = signal_name,
1016                    next_signal = next_signal_name,
1017                    error = %err,
1018                    "Graceful shutdown signal could not be sent. Attempting next shutdown phase."
1019                );
1020                diagnostics.record_graceful_signal_error(phase, signal_name, err);
1021                self.observe_exit_after_failed_signal(signal_name, phase, diagnostics)
1022                    .await
1023            }
1024        }
1025    }
1026
1027    async fn wait_after_graceful_signal(
1028        &mut self,
1029        signal_name: &'static str,
1030        next_signal_name: &'static str,
1031        timeout: Duration,
1032        phase: GracefulTerminationPhase,
1033        diagnostics: &mut TerminationDiagnostics,
1034    ) -> Option<TerminationOutcome> {
1035        match self.wait_for_exit_after_signal(timeout).await {
1036            Ok(Some(exit_status)) => Some(TerminationOutcome::graceful_success(exit_status)),
1037            Ok(None) => {
1038                let not_terminated = Self::wait_timeout_diagnostic(timeout);
1039                tracing::warn!(
1040                    process = %self.name,
1041                    signal = signal_name,
1042                    next_signal = next_signal_name,
1043                    error = %not_terminated,
1044                    "Graceful shutdown signal timed out. Attempting next shutdown phase."
1045                );
1046                diagnostics.record_graceful_wait_error(phase, signal_name, not_terminated);
1047                None
1048            }
1049            Err(wait_error) => {
1050                tracing::warn!(
1051                    process = %self.name,
1052                    signal = signal_name,
1053                    next_signal = next_signal_name,
1054                    error = %wait_error,
1055                    "Graceful shutdown signal timed out. Attempting next shutdown phase."
1056                );
1057                diagnostics.record_graceful_wait_error(phase, signal_name, wait_error);
1058                None
1059            }
1060        }
1061    }
1062
1063    /// Recovery probe after a graceful signal send failed: waits briefly so a freshly-exited
1064    /// child is observed as exited rather than as still running. See
1065    /// [`REAP_AFTER_SIGNAL_FAILURE_GRACE`].
1066    async fn observe_exit_after_failed_signal(
1067        &mut self,
1068        signal_name: &'static str,
1069        phase: GracefulTerminationPhase,
1070        diagnostics: &mut TerminationDiagnostics,
1071    ) -> Option<TerminationOutcome> {
1072        match self
1073            .wait_for_exit_after_signal(REAP_AFTER_SIGNAL_FAILURE_GRACE)
1074            .await
1075        {
1076            Ok(Some(exit_status)) => Some(TerminationOutcome::graceful_success(exit_status)),
1077            Ok(None) => None,
1078            Err(reap_error) => {
1079                tracing::warn!(
1080                    process = %self.name,
1081                    signal = signal_name,
1082                    error = %reap_error,
1083                    "Could not determine process state after graceful signal send failed."
1084                );
1085                diagnostics.record_graceful_status_error(phase, signal_name, reap_error);
1086                None
1087            }
1088        }
1089    }
1090
1091    async fn attempt_forceful_kill(
1092        &mut self,
1093        mut diagnostics: TerminationDiagnostics,
1094    ) -> Result<TerminationOutcome, TerminationError> {
1095        match Self::start_kill_process_group(&mut self.child) {
1096            Ok(()) => {
1097                // Note: A forceful kill should typically (somewhat) immediately lead to
1098                // termination of the process. But there are cases in which even a forceful kill
1099                // does not / cannot / will not kill a process. We do not want to wait indefinitely
1100                // in case this happens and therefore wait (at max) for a fixed duration after any
1101                // kill.
1102                match self
1103                    .wait_for_exit_after_signal(FORCE_KILL_WAIT_TIMEOUT)
1104                    .await
1105                {
1106                    Ok(Some(exit_status)) => {
1107                        Ok(TerminationOutcome::force_kill_success(exit_status))
1108                    }
1109                    Ok(None) => {
1110                        let not_terminated_after_kill =
1111                            Self::wait_timeout_diagnostic(FORCE_KILL_WAIT_TIMEOUT);
1112                        // Unlikely. See the note above.
1113                        tracing::error!(
1114                            process = %self.name,
1115                            kill_signal = KILL_LABEL,
1116                            "Process did not terminate after all termination attempts. Process may still be running. Manual intervention and investigation required!"
1117                        );
1118                        diagnostics.record_kill_wait_error(not_terminated_after_kill);
1119                        Err(diagnostics.into_termination_failed(self.name.clone()))
1120                    }
1121                    Err(not_terminated_after_kill) => {
1122                        // Unlikely. See the note above.
1123                        tracing::error!(
1124                            process = %self.name,
1125                            kill_signal = KILL_LABEL,
1126                            "Process did not terminate after all termination attempts. Process may still be running. Manual intervention and investigation required!"
1127                        );
1128                        diagnostics.record_kill_wait_error(not_terminated_after_kill);
1129                        Err(diagnostics.into_termination_failed(self.name.clone()))
1130                    }
1131                }
1132            }
1133            Err(kill_error) => {
1134                tracing::error!(
1135                    process = %self.name,
1136                    error = %kill_error,
1137                    signal = KILL_LABEL,
1138                    "Forceful shutdown failed. Process may still be running. Manual intervention required!"
1139                );
1140                diagnostics.record_kill_signal_error(kill_error);
1141
1142                // Brief grace for Tokio's SIGCHLD reaper to catch up - see
1143                // `REAP_AFTER_SIGNAL_FAILURE_GRACE`.
1144                match self
1145                    .wait_for_exit_after_signal(REAP_AFTER_SIGNAL_FAILURE_GRACE)
1146                    .await
1147                {
1148                    Ok(Some(exit_status)) => {
1149                        return Ok(TerminationOutcome::graceful_success(exit_status));
1150                    }
1151                    Ok(None) => {}
1152                    Err(reap_error) => {
1153                        tracing::warn!(
1154                            process = %self.name,
1155                            signal = KILL_LABEL,
1156                            error = %reap_error,
1157                            "Could not determine process state after forceful shutdown failed."
1158                        );
1159                        diagnostics.record_kill_status_error(reap_error);
1160                    }
1161                }
1162
1163                Err(diagnostics.into_termination_failed(self.name.clone()))
1164            }
1165        }
1166    }
1167}
1168
1169#[cfg(test)]
1170mod tests;