Skip to main content

tokio_process_tools/process_handle/termination/
mod.rs

1use super::ProcessHandle;
2use crate::error::{
3    TerminationAttemptError, TerminationAttemptOperation, TerminationAttemptPhase, TerminationError,
4};
5use crate::output_stream::OutputStream;
6use std::borrow::Cow;
7use std::error::Error;
8use std::io;
9use std::process::ExitStatus;
10use std::time::Duration;
11
12/// Maximum time to wait for process termination after forceful kill.
13///
14/// This is a safety timeout since forceful kill should terminate processes immediately,
15/// but there are rare cases where even forceful kill may not work.
16#[cfg(any(unix, windows))]
17const FORCE_KILL_WAIT_TIMEOUT: Duration = Duration::from_secs(3);
18
19/// Grace window granted to Tokio's SIGCHLD reaper after a signal-send failure so a freshly-exited
20/// child is observed as exited rather than as still running. Covers the brief race where the OS
21/// rejects signals to a not-yet-reaped process group (`EPERM` on macOS, `ESRCH` on Linux).
22#[cfg(any(unix, windows))]
23const REAP_AFTER_SIGNAL_FAILURE_GRACE: Duration = Duration::from_millis(100);
24
25#[cfg(unix)]
26const INTERRUPT_LABEL: &str = "SIGINT";
27#[cfg(unix)]
28const TERMINATE_LABEL: &str = "SIGTERM";
29
30#[cfg(windows)]
31const GRACEFUL_LABEL: &str = "CTRL_BREAK_EVENT";
32
33/// Label recorded in diagnostics for the forceful kill phase. Cross-platform because `kill()` is
34/// available on every platform Tokio supports (the underlying `Child::start_kill()` is what runs
35/// on targets where graceful escalation is unavailable).
36#[cfg(unix)]
37const KILL_LABEL: &str = "SIGKILL";
38#[cfg(windows)]
39const KILL_LABEL: &str = "TerminateProcess";
40#[cfg(not(any(unix, windows)))]
41const KILL_LABEL: &str = "kill";
42
43/// Per-platform graceful-shutdown timeout budget passed to [`ProcessHandle::terminate`] and
44/// related APIs.
45///
46/// The shape mirrors the platform's actual graceful-shutdown model. Cross-platform code
47/// constructs the value under cfg gates and then passes it to the cross-platform `terminate(...)`
48/// signature:
49///
50/// ```rust,ignore
51/// use std::time::Duration;
52/// use tokio_process_tools::GracefulTimeouts;
53///
54/// #[cfg(unix)]
55/// let timeouts = GracefulTimeouts {
56///     interrupt_timeout: Duration::from_secs(3),
57///     terminate_timeout: Duration::from_secs(5),
58/// };
59/// #[cfg(windows)]
60/// let timeouts = GracefulTimeouts {
61///     graceful_timeout: Duration::from_secs(8),
62/// };
63///
64/// process.terminate(timeouts).await?;
65/// ```
66///
67/// On Unix the type carries two separate budgets, one per graceful phase
68/// (`SIGINT` then `SIGTERM`). On Windows it carries a single budget because
69/// `GenerateConsoleCtrlEvent` can only target a child's process group with `CTRL_BREAK_EVENT`;
70/// sending the same event a second time cannot do more than the first send already did, so the
71/// Windows `terminate(...)` runs exactly one graceful step.
72///
73/// This type is only available on Unix and Windows because the underlying graceful-shutdown
74/// signals only exist there. On other Tokio-supported targets the spawn, wait,
75/// output-collection, and [`ProcessHandle::kill`] APIs remain available; only the
76/// graceful-termination surface (`terminate(...)`, `terminate_on_drop(...)`,
77/// `wait_for_completion_or_terminate(...)`, the `send_*_signal(...)` methods, and this type) is
78/// gated out.
79#[cfg(any(unix, windows))]
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub struct GracefulTimeouts {
82    /// Maximum time to wait after sending `SIGINT` before escalating to `SIGTERM`.
83    #[cfg(unix)]
84    pub interrupt_timeout: Duration,
85    /// Maximum time to wait after sending `SIGTERM` before escalating to `SIGKILL`.
86    #[cfg(unix)]
87    pub terminate_timeout: Duration,
88    /// Maximum time to wait after sending `CTRL_BREAK_EVENT` before escalating to
89    /// `TerminateProcess`.
90    #[cfg(windows)]
91    pub graceful_timeout: Duration,
92}
93
94#[cfg(any(unix, windows))]
95impl GracefulTimeouts {
96    /// Combined graceful-shutdown budget, used for downstream output-collection deadlines.
97    pub(crate) fn total(self) -> Duration {
98        #[cfg(unix)]
99        {
100            self.interrupt_timeout
101                .saturating_add(self.terminate_timeout)
102        }
103        #[cfg(windows)]
104        {
105            self.graceful_timeout
106        }
107    }
108}
109
110#[cfg(any(unix, windows))]
111#[derive(Debug, Clone, Copy, PartialEq, Eq)]
112pub(crate) struct TerminationOutcome {
113    pub(crate) exit_status: ExitStatus,
114    pub(crate) output_collection_timeout_extension: Duration,
115}
116
117#[cfg(any(unix, windows))]
118impl TerminationOutcome {
119    fn graceful_success(exit_status: ExitStatus) -> Self {
120        Self {
121            exit_status,
122            output_collection_timeout_extension: Duration::ZERO,
123        }
124    }
125
126    fn force_kill_success(exit_status: ExitStatus) -> Self {
127        Self {
128            exit_status,
129            output_collection_timeout_extension: FORCE_KILL_WAIT_TIMEOUT,
130        }
131    }
132}
133
134#[cfg(any(unix, windows))]
135#[derive(Debug, Clone, Copy)]
136enum GracefulTerminationPhase {
137    #[cfg(unix)]
138    Interrupt,
139    Terminate,
140}
141
142#[cfg(any(unix, windows))]
143impl GracefulTerminationPhase {
144    fn attempt_phase(self) -> TerminationAttemptPhase {
145        match self {
146            #[cfg(unix)]
147            Self::Interrupt => TerminationAttemptPhase::Interrupt,
148            Self::Terminate => TerminationAttemptPhase::Terminate,
149        }
150    }
151}
152
153#[derive(Debug, Default)]
154struct TerminationDiagnostics {
155    attempt_errors: Vec<TerminationAttemptError>,
156}
157
158impl TerminationDiagnostics {
159    #[cfg(any(unix, windows))]
160    fn record_preflight_status_error(&mut self, error: impl Error + Send + Sync + 'static) {
161        self.record(
162            TerminationAttemptPhase::Preflight,
163            TerminationAttemptOperation::CheckStatus,
164            None,
165            error,
166        );
167    }
168
169    #[cfg(any(unix, windows))]
170    fn record_graceful_signal_error(
171        &mut self,
172        phase: GracefulTerminationPhase,
173        signal_name: &'static str,
174        error: impl Error + Send + Sync + 'static,
175    ) {
176        self.record(
177            phase.attempt_phase(),
178            TerminationAttemptOperation::SendSignal,
179            Some(signal_name),
180            error,
181        );
182    }
183
184    #[cfg(any(unix, windows))]
185    fn record_graceful_wait_error(
186        &mut self,
187        phase: GracefulTerminationPhase,
188        signal_name: &'static str,
189        error: impl Error + Send + Sync + 'static,
190    ) {
191        self.record(
192            phase.attempt_phase(),
193            TerminationAttemptOperation::WaitForExit,
194            Some(signal_name),
195            error,
196        );
197    }
198
199    #[cfg(any(unix, windows))]
200    fn record_graceful_status_error(
201        &mut self,
202        phase: GracefulTerminationPhase,
203        signal_name: &'static str,
204        error: impl Error + Send + Sync + 'static,
205    ) {
206        self.record(
207            phase.attempt_phase(),
208            TerminationAttemptOperation::CheckStatus,
209            Some(signal_name),
210            error,
211        );
212    }
213
214    fn record_kill_signal_error(&mut self, error: impl Error + Send + Sync + 'static) {
215        self.record(
216            TerminationAttemptPhase::Kill,
217            TerminationAttemptOperation::SendSignal,
218            Some(KILL_LABEL),
219            error,
220        );
221    }
222
223    fn record_kill_wait_error(&mut self, error: impl Error + Send + Sync + 'static) {
224        self.record(
225            TerminationAttemptPhase::Kill,
226            TerminationAttemptOperation::WaitForExit,
227            Some(KILL_LABEL),
228            error,
229        );
230    }
231
232    #[cfg(any(unix, windows))]
233    fn record_kill_status_error(&mut self, error: impl Error + Send + Sync + 'static) {
234        self.record(
235            TerminationAttemptPhase::Kill,
236            TerminationAttemptOperation::CheckStatus,
237            Some(KILL_LABEL),
238            error,
239        );
240    }
241
242    fn record(
243        &mut self,
244        phase: TerminationAttemptPhase,
245        operation: TerminationAttemptOperation,
246        signal_name: Option<&'static str>,
247        error: impl Error + Send + Sync + 'static,
248    ) {
249        self.attempt_errors.push(TerminationAttemptError {
250            phase,
251            operation,
252            signal_name,
253            source: Box::new(error),
254        });
255    }
256
257    #[must_use]
258    fn into_termination_failed(self, process_name: Cow<'static, str>) -> TerminationError {
259        assert!(
260            !self.attempt_errors.is_empty(),
261            "into_termination_failed must not be used when no error was recorded!",
262        );
263
264        TerminationError::TerminationFailed {
265            process_name,
266            attempt_errors: self.attempt_errors,
267        }
268    }
269
270    #[cfg(any(unix, windows))]
271    #[must_use]
272    fn into_signal_failed(self, process_name: Cow<'static, str>) -> TerminationError {
273        assert!(
274            !self.attempt_errors.is_empty(),
275            "into_signal_failed must not be used when no error was recorded!",
276        );
277
278        TerminationError::SignalFailed {
279            process_name,
280            attempt_errors: self.attempt_errors,
281        }
282    }
283}
284
285// Cross-platform termination methods. `kill()` and the Drop best-effort cleanup work on every
286// Tokio-supported platform via `tokio::process::Child::start_kill()`, so they stay available
287// even on targets where graceful-termination escalation is not.
288impl<Stdout, Stderr> ProcessHandle<Stdout, Stderr>
289where
290    Stdout: OutputStream,
291    Stderr: OutputStream,
292{
293    /// Forces the process to exit. Most users should call [`ProcessHandle::terminate`] instead.
294    ///
295    /// This is equivalent to sending `SIGKILL` on Unix or calling `TerminateProcess` on Windows,
296    /// followed by wait. On other Tokio-supported platforms it forwards to
297    /// [`tokio::process::Child::start_kill`].
298    /// Any still-open stdin handle is closed before Tokio performs that kill-and-wait sequence,
299    /// matching [`tokio::process::Child::kill`] semantics.
300    /// A successful call waits for the child to exit and disarms the drop cleanup and panic guards,
301    /// so the handle can be dropped safely afterward.
302    ///
303    /// `kill` is a reasonable next step when [`terminate`](Self::terminate) returns `Err` and the
304    /// caller is not interested in further graceful escalation.
305    ///
306    /// # Errors
307    ///
308    /// Returns [`TerminationError`] if Tokio cannot kill or wait for the child process.
309    pub async fn kill(&mut self) -> Result<(), TerminationError> {
310        self.kill_inner(Self::start_kill_raw).await
311    }
312
313    pub(super) async fn kill_inner<StartKill>(
314        &mut self,
315        mut start_kill: StartKill,
316    ) -> Result<(), TerminationError>
317    where
318        StartKill: FnMut(&mut Self) -> Result<(), io::Error>,
319    {
320        self.stdin().close();
321        let mut diagnostics = TerminationDiagnostics::default();
322
323        if let Err(err) = start_kill(self) {
324            diagnostics.record_kill_signal_error(err);
325            return Err(diagnostics.into_termination_failed(self.name.clone()));
326        }
327
328        if let Err(err) = self.wait_for_completion_unbounded_inner().await {
329            diagnostics.record_kill_wait_error(err);
330            return Err(diagnostics.into_termination_failed(self.name.clone()));
331        }
332
333        Ok(())
334    }
335
336    pub(super) fn start_kill_raw(&mut self) -> Result<(), io::Error> {
337        Self::start_kill_process_group(&mut self.child)
338    }
339
340    /// Sends `SIGKILL` to the child's process group on Unix and forwards to Tokio's
341    /// `Child::start_kill` everywhere else.
342    ///
343    /// On Unix the child is the leader of a process group set up at spawn time, so targeting the
344    /// group reaches any grandchildren the child has fork-execed. Tokio's stock `start_kill`
345    /// targets only the child's PID and would orphan that subtree. On Windows the standard
346    /// `TerminateProcess` semantics still apply; the pre-kill `CTRL_BREAK_EVENT` step in
347    /// [`Self::terminate`] is what reaches the rest of the console process group there. On other
348    /// Tokio-supported platforms there is no library-managed process-group setup, so `start_kill`
349    /// targets the child directly.
350    pub(super) fn start_kill_process_group(
351        child: &mut tokio::process::Child,
352    ) -> Result<(), io::Error> {
353        #[cfg(unix)]
354        {
355            match child.id() {
356                Some(pid) => crate::signal::send_kill_to_process_group(pid),
357                // Already reaped. Tokio's start_kill would have surfaced this as an error;
358                // matching its behavior keeps the caller paths identical.
359                None => child.start_kill(),
360            }
361        }
362        #[cfg(not(unix))]
363        {
364            child.start_kill()
365        }
366    }
367}
368
369// Graceful-termination methods. Only available on Unix and Windows because they rely on platform
370// signal primitives that have no cross-platform analogue.
371#[cfg(any(unix, windows))]
372impl<Stdout, Stderr> ProcessHandle<Stdout, Stderr>
373where
374    Stdout: OutputStream,
375    Stderr: OutputStream,
376{
377    /// Manually send `SIGINT` to this process's process group via `killpg`.
378    ///
379    /// `SIGINT` is the dedicated user-interrupt signal, distinct from the `SIGTERM` delivered by
380    /// [`Self::send_terminate_signal`]. The signal targets the child's process group, so any
381    /// grandchildren the child has fork-execed are signaled together with the leader.
382    ///
383    /// If the process has already exited, this reaps it and returns `Ok(())` instead of
384    /// attempting to signal a stale PID or process group. If the signal send fails because the
385    /// child exited after the preflight check, this also reaps it and returns `Ok(())`.
386    ///
387    /// Prefer to call `terminate` instead, if you want to make sure this process is terminated.
388    ///
389    /// This method is Unix-only because Windows has no targetable `SIGINT` analogue:
390    /// `GenerateConsoleCtrlEvent` only accepts `CTRL_BREAK_EVENT` for nonzero process groups.
391    /// On Windows, use `send_ctrl_break_signal` instead.
392    ///
393    /// # Errors
394    ///
395    /// Returns [`TerminationError`] if the process status could not be checked or if `SIGINT`
396    /// could not be sent.
397    #[cfg(unix)]
398    pub fn send_interrupt_signal(&mut self) -> Result<(), TerminationError> {
399        self.send_signal_with_preflight_reap(
400            GracefulTerminationPhase::Interrupt,
401            INTERRUPT_LABEL,
402            crate::signal::send_interrupt,
403        )
404    }
405
406    /// Manually send `SIGTERM` to this process's process group via `killpg`.
407    ///
408    /// `SIGTERM` is the conventional "asked to terminate" signal sent by service supervisors and
409    /// the operating system at shutdown. The signal targets the child's process group, so any
410    /// grandchildren the child has fork-execed are signaled together with the leader.
411    ///
412    /// If the process has already exited, this reaps it and returns `Ok(())` instead of
413    /// attempting to signal a stale PID or process group. If the signal send fails because the
414    /// child exited after the preflight check, this also reaps it and returns `Ok(())`.
415    ///
416    /// Prefer to call `terminate` instead, if you want to make sure this process is terminated.
417    ///
418    /// This method is Unix-only because Windows has no targetable `SIGTERM` analogue:
419    /// `GenerateConsoleCtrlEvent` only accepts `CTRL_BREAK_EVENT` for nonzero process groups.
420    /// On Windows, use `send_ctrl_break_signal` instead.
421    ///
422    /// # Errors
423    ///
424    /// Returns [`TerminationError`] if the process status could not be checked or if `SIGTERM`
425    /// could not be sent.
426    #[cfg(unix)]
427    pub fn send_terminate_signal(&mut self) -> Result<(), TerminationError> {
428        self.send_signal_with_preflight_reap(
429            GracefulTerminationPhase::Terminate,
430            TERMINATE_LABEL,
431            crate::signal::send_terminate,
432        )
433    }
434
435    /// Manually deliver `CTRL_BREAK_EVENT` to this process's console process group via
436    /// `GenerateConsoleCtrlEvent`.
437    ///
438    /// `CTRL_BREAK_EVENT` is the only console control event that can be targeted at a nonzero
439    /// process group: `CTRL_C_EVENT` requires `dwProcessGroupId = 0` and would be broadcast to
440    /// every process sharing the calling console (including the parent), so it is not usable to
441    /// terminate a single child group. There is therefore no separate `SIGINT` vs. `SIGTERM`
442    /// distinction on Windows; this single method covers the entire graceful-shutdown surface.
443    ///
444    /// If the process has already exited, this reaps it and returns `Ok(())` instead of
445    /// attempting to signal a stale PID or process group. If the signal send fails because the
446    /// child exited after the preflight check, this also reaps it and returns `Ok(())`.
447    ///
448    /// Prefer to call `terminate` instead, if you want to make sure this process is terminated.
449    ///
450    /// This method is Windows-only. On Unix, use `send_interrupt_signal` or
451    /// `send_terminate_signal` instead.
452    ///
453    /// # Errors
454    ///
455    /// Returns [`TerminationError`] if the process status could not be checked or if
456    /// `CTRL_BREAK_EVENT` could not be delivered.
457    #[cfg(windows)]
458    pub fn send_ctrl_break_signal(&mut self) -> Result<(), TerminationError> {
459        self.send_signal_with_preflight_reap(
460            GracefulTerminationPhase::Terminate,
461            GRACEFUL_LABEL,
462            crate::signal::send_ctrl_break,
463        )
464    }
465
466    /// Terminates this process by sending platform graceful shutdown signals first, then killing
467    /// the process if it does not complete after receiving them.
468    ///
469    /// The signature is the same on every supported platform; the shape of `timeouts` is
470    /// platform-conditional. See [`GracefulTimeouts`] for how to construct one.
471    ///
472    /// - On Unix this is a 3-phase escalation: `SIGINT` -> wait `timeouts.interrupt_timeout` ->
473    ///   `SIGTERM` -> wait `timeouts.terminate_timeout` -> `SIGKILL`. The two distinct graceful
474    ///   signals matter in practice: idiomatic async Rust binaries use `tokio::signal::ctrl_c()`
475    ///   (which on Unix listens only for `SIGINT`), and Python child processes turn `SIGINT`
476    ///   into a `KeyboardInterrupt` exception that runs `try/finally` cleanup, while `SIGTERM`
477    ///   falls through to the runtime's default handler.
478    /// - On Windows this is a 2-phase termination: `CTRL_BREAK_EVENT` -> wait
479    ///   `timeouts.graceful_timeout` -> `TerminateProcess`. **Only one `CTRL_BREAK_EVENT` is
480    ///   ever sent.** `GenerateConsoleCtrlEvent` can only target a child's process group with
481    ///   `CTRL_BREAK_EVENT` (sending `CTRL_C_EVENT` would require `dwProcessGroupId = 0` and
482    ///   broadcast to the parent), so a second graceful send would be the same event and cannot
483    ///   do more than the first send already did.
484    ///
485    /// The forceful kill fallback adds one fixed 3-second wait on top of the graceful timeouts.
486    ///
487    /// # Windows interop note
488    ///
489    /// `tokio::signal::ctrl_c()` on Windows registers only for `CTRL_C_EVENT`; it does not catch
490    /// `CTRL_BREAK_EVENT`. A child Rust binary that listens only on the cross-platform
491    /// `tokio::signal::ctrl_c()` will not respond to this graceful step on Windows and will be
492    /// terminated forcefully after `graceful_timeout`. To interoperate, such a child should
493    /// additionally listen on `tokio::signal::windows::ctrl_break()`, or expose another
494    /// shutdown channel (stdin sentinel, IPC, or a command protocol).
495    ///
496    /// # Timeouts
497    ///
498    /// Each per-phase timeout in `timeouts` bounds the post-signal wait of its phase:
499    ///
500    /// - Signal send succeeds: wait up to the user-supplied timeout, then escalate.
501    /// - Signal send fails: replace the user timeout with a fixed 100 ms grace so Tokio's
502    ///   reaper can catch up to a child that just exited (the OS rejects signals to a not-yet-
503    ///   reaped process group with `EPERM` on macOS or `ESRCH` on Linux). Real permission
504    ///   denials still surface as an error after the grace elapses.
505    ///
506    /// `Duration::from_secs(0)` disables the post-signal wait entirely and effectively forces
507    /// the call into the forceful kill (`SIGKILL` on Unix, `TerminateProcess` on Windows).
508    /// Prefer small but non-zero values (e.g. 100 ms to a few seconds).
509    ///
510    /// # Drop guards on `Ok` vs `Err`
511    ///
512    /// On `Ok`, the drop cleanup and panic guards are disarmed and the handle can be dropped
513    /// safely. On `Err` (or if the future is canceled), the guards stay armed: the library cannot
514    /// verify cleanup from the outside, so dropping would leak a process. Recover by retrying
515    /// `terminate`, escalating to [`kill`](Self::kill), calling
516    /// [`must_not_be_terminated`](Self::must_not_be_terminated) to accept the failure, or
517    /// propagating the error and letting the panic-on-drop surface the leak.
518    ///
519    /// # Errors
520    ///
521    /// Returns [`TerminationError`] if signalling or waiting for process termination fails.
522    pub async fn terminate(
523        &mut self,
524        timeouts: GracefulTimeouts,
525    ) -> Result<ExitStatus, TerminationError> {
526        self.terminate_detailed(timeouts)
527            .await
528            .map(|outcome| outcome.exit_status)
529    }
530
531    pub(crate) async fn terminate_detailed(
532        &mut self,
533        timeouts: GracefulTimeouts,
534    ) -> Result<TerminationOutcome, TerminationError> {
535        #[cfg(unix)]
536        {
537            self.terminate_inner_with_preflight_reaper(
538                timeouts,
539                Self::try_reap_exit_status,
540                Self::send_interrupt_signal_raw,
541                Self::send_terminate_signal_raw,
542            )
543            .await
544        }
545        #[cfg(windows)]
546        {
547            self.terminate_inner_with_preflight_reaper(
548                timeouts,
549                Self::try_reap_exit_status,
550                Self::send_ctrl_break_signal_raw,
551            )
552            .await
553        }
554    }
555
556    #[cfg(all(test, unix))]
557    async fn terminate_inner<InterruptSignalSender, TerminateSignalSender>(
558        &mut self,
559        interrupt_timeout: Duration,
560        terminate_timeout: Duration,
561        send_interrupt_signal: InterruptSignalSender,
562        send_terminate_signal: TerminateSignalSender,
563    ) -> Result<ExitStatus, TerminationError>
564    where
565        InterruptSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
566        TerminateSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
567    {
568        self.terminate_inner_detailed(
569            interrupt_timeout,
570            terminate_timeout,
571            send_interrupt_signal,
572            send_terminate_signal,
573        )
574        .await
575        .map(|outcome| outcome.exit_status)
576    }
577
578    #[cfg(all(test, windows))]
579    async fn terminate_inner<GracefulSignalSender>(
580        &mut self,
581        graceful_timeout: Duration,
582        send_graceful_signal: GracefulSignalSender,
583    ) -> Result<ExitStatus, TerminationError>
584    where
585        GracefulSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
586    {
587        self.terminate_inner_detailed(graceful_timeout, send_graceful_signal)
588            .await
589            .map(|outcome| outcome.exit_status)
590    }
591
592    #[cfg(all(test, unix))]
593    async fn terminate_inner_detailed<InterruptSignalSender, TerminateSignalSender>(
594        &mut self,
595        interrupt_timeout: Duration,
596        terminate_timeout: Duration,
597        send_interrupt_signal: InterruptSignalSender,
598        send_terminate_signal: TerminateSignalSender,
599    ) -> Result<TerminationOutcome, TerminationError>
600    where
601        InterruptSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
602        TerminateSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
603    {
604        self.terminate_inner_with_preflight_reaper(
605            GracefulTimeouts {
606                interrupt_timeout,
607                terminate_timeout,
608            },
609            Self::try_reap_exit_status,
610            send_interrupt_signal,
611            send_terminate_signal,
612        )
613        .await
614    }
615
616    #[cfg(all(test, windows))]
617    async fn terminate_inner_detailed<GracefulSignalSender>(
618        &mut self,
619        graceful_timeout: Duration,
620        send_graceful_signal: GracefulSignalSender,
621    ) -> Result<TerminationOutcome, TerminationError>
622    where
623        GracefulSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
624    {
625        self.terminate_inner_with_preflight_reaper(
626            GracefulTimeouts { graceful_timeout },
627            Self::try_reap_exit_status,
628            send_graceful_signal,
629        )
630        .await
631    }
632
633    #[cfg(unix)]
634    async fn terminate_inner_with_preflight_reaper<
635        PreflightReaper,
636        InterruptSignalSender,
637        TerminateSignalSender,
638    >(
639        &mut self,
640        timeouts: GracefulTimeouts,
641        mut try_reap_exit_status: PreflightReaper,
642        mut send_interrupt_signal: InterruptSignalSender,
643        mut send_terminate_signal: TerminateSignalSender,
644    ) -> Result<TerminationOutcome, TerminationError>
645    where
646        PreflightReaper: FnMut(&mut Self) -> Result<Option<ExitStatus>, io::Error>,
647        InterruptSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
648        TerminateSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
649    {
650        let result = 'termination: {
651            let mut diagnostics = TerminationDiagnostics::default();
652
653            match try_reap_exit_status(self) {
654                Ok(Some(exit_status)) => {
655                    break 'termination Ok(TerminationOutcome::graceful_success(exit_status));
656                }
657                Ok(None) => {}
658                Err(err) => {
659                    tracing::warn!(
660                        process = %self.name,
661                        signal = INTERRUPT_LABEL,
662                        error = %err,
663                        "Could not determine process state before termination. Attempting interrupt signal."
664                    );
665                    diagnostics.record_preflight_status_error(err);
666                }
667            }
668            if let Some(exit_status) = self
669                .attempt_graceful_phase(
670                    INTERRUPT_LABEL,
671                    TERMINATE_LABEL,
672                    timeouts.interrupt_timeout,
673                    GracefulTerminationPhase::Interrupt,
674                    &mut diagnostics,
675                    &mut send_interrupt_signal,
676                )
677                .await
678            {
679                break 'termination Ok(exit_status);
680            }
681
682            if let Some(exit_status) = self
683                .attempt_graceful_phase(
684                    TERMINATE_LABEL,
685                    KILL_LABEL,
686                    timeouts.terminate_timeout,
687                    GracefulTerminationPhase::Terminate,
688                    &mut diagnostics,
689                    &mut send_terminate_signal,
690                )
691                .await
692            {
693                break 'termination Ok(exit_status);
694            }
695
696            self.attempt_forceful_kill(diagnostics).await
697        };
698
699        self.disarm_after_successful_termination(result)
700    }
701
702    #[cfg(windows)]
703    async fn terminate_inner_with_preflight_reaper<PreflightReaper, GracefulSignalSender>(
704        &mut self,
705        timeouts: GracefulTimeouts,
706        mut try_reap_exit_status: PreflightReaper,
707        mut send_graceful_signal: GracefulSignalSender,
708    ) -> Result<TerminationOutcome, TerminationError>
709    where
710        PreflightReaper: FnMut(&mut Self) -> Result<Option<ExitStatus>, io::Error>,
711        GracefulSignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
712    {
713        let result = 'termination: {
714            let mut diagnostics = TerminationDiagnostics::default();
715
716            match try_reap_exit_status(self) {
717                Ok(Some(exit_status)) => {
718                    break 'termination Ok(TerminationOutcome::graceful_success(exit_status));
719                }
720                Ok(None) => {}
721                Err(err) => {
722                    tracing::warn!(
723                        process = %self.name,
724                        signal = GRACEFUL_LABEL,
725                        error = %err,
726                        "Could not determine process state before termination. Attempting graceful signal."
727                    );
728                    diagnostics.record_preflight_status_error(err);
729                }
730            }
731
732            if let Some(exit_status) = self
733                .attempt_graceful_phase(
734                    GRACEFUL_LABEL,
735                    KILL_LABEL,
736                    timeouts.graceful_timeout,
737                    GracefulTerminationPhase::Terminate,
738                    &mut diagnostics,
739                    &mut send_graceful_signal,
740                )
741                .await
742            {
743                break 'termination Ok(exit_status);
744            }
745
746            self.attempt_forceful_kill(diagnostics).await
747        };
748
749        self.disarm_after_successful_termination(result)
750    }
751
752    fn send_signal_with_preflight_reap<SignalSender>(
753        &mut self,
754        phase: GracefulTerminationPhase,
755        signal_name: &'static str,
756        send_signal: SignalSender,
757    ) -> Result<(), TerminationError>
758    where
759        SignalSender: FnOnce(&tokio::process::Child) -> Result<(), io::Error>,
760    {
761        self.send_signal_with_reaper(phase, signal_name, send_signal, Self::try_reap_exit_status)
762    }
763
764    fn send_signal_with_reaper<SignalSender, Reaper>(
765        &mut self,
766        phase: GracefulTerminationPhase,
767        signal_name: &'static str,
768        send_signal: SignalSender,
769        mut try_reap_exit_status: Reaper,
770    ) -> Result<(), TerminationError>
771    where
772        SignalSender: FnOnce(&tokio::process::Child) -> Result<(), io::Error>,
773        Reaper: FnMut(&mut Self) -> Result<Option<ExitStatus>, io::Error>,
774    {
775        let mut diagnostics = TerminationDiagnostics::default();
776
777        match try_reap_exit_status(self) {
778            Ok(Some(_)) => {
779                self.must_not_be_terminated();
780                Ok(())
781            }
782            Ok(None) => match send_signal(&self.child) {
783                Ok(()) => Ok(()),
784                // Sync probe only - the SIGCHLD-grace bounded wait lives on the `terminate()`
785                // path. Keeping this sync avoids making the public `send_*_signal` APIs async.
786                Err(signal_error) => match try_reap_exit_status(self) {
787                    Ok(Some(_)) => {
788                        self.must_not_be_terminated();
789                        Ok(())
790                    }
791                    Ok(None) => {
792                        diagnostics.record_graceful_signal_error(phase, signal_name, signal_error);
793                        Err(diagnostics.into_signal_failed(self.name.clone()))
794                    }
795                    Err(reap_error) => {
796                        diagnostics.record_graceful_signal_error(phase, signal_name, signal_error);
797                        diagnostics.record_graceful_status_error(phase, signal_name, reap_error);
798                        Err(diagnostics.into_signal_failed(self.name.clone()))
799                    }
800                },
801            },
802            Err(status_error) => {
803                diagnostics.record_graceful_status_error(phase, signal_name, status_error);
804                Err(diagnostics.into_signal_failed(self.name.clone()))
805            }
806        }
807    }
808
809    #[cfg(unix)]
810    fn send_interrupt_signal_raw(&mut self) -> Result<(), io::Error> {
811        crate::signal::send_interrupt(&self.child)
812    }
813
814    #[cfg(unix)]
815    fn send_terminate_signal_raw(&mut self) -> Result<(), io::Error> {
816        crate::signal::send_terminate(&self.child)
817    }
818
819    #[cfg(windows)]
820    fn send_ctrl_break_signal_raw(&mut self) -> Result<(), io::Error> {
821        crate::signal::send_ctrl_break(&self.child)
822    }
823
824    fn disarm_after_successful_termination<T>(
825        &mut self,
826        result: Result<T, TerminationError>,
827    ) -> Result<T, TerminationError> {
828        if result.is_ok() {
829            self.must_not_be_terminated();
830        }
831
832        result
833    }
834
835    async fn attempt_graceful_phase<SignalSender>(
836        &mut self,
837        signal_name: &'static str,
838        next_signal_name: &'static str,
839        timeout: Duration,
840        phase: GracefulTerminationPhase,
841        diagnostics: &mut TerminationDiagnostics,
842        send_signal: &mut SignalSender,
843    ) -> Option<TerminationOutcome>
844    where
845        SignalSender: FnMut(&mut Self) -> Result<(), io::Error>,
846    {
847        match send_signal(self) {
848            Ok(()) => {
849                self.wait_after_graceful_signal(
850                    signal_name,
851                    next_signal_name,
852                    timeout,
853                    phase,
854                    diagnostics,
855                )
856                .await
857            }
858            Err(err) => {
859                tracing::warn!(
860                    process = %self.name,
861                    signal = signal_name,
862                    next_signal = next_signal_name,
863                    error = %err,
864                    "Graceful shutdown signal could not be sent. Attempting next shutdown phase."
865                );
866                diagnostics.record_graceful_signal_error(phase, signal_name, err);
867                self.observe_exit_after_failed_signal(signal_name, phase, diagnostics)
868                    .await
869            }
870        }
871    }
872
873    async fn wait_after_graceful_signal(
874        &mut self,
875        signal_name: &'static str,
876        next_signal_name: &'static str,
877        timeout: Duration,
878        phase: GracefulTerminationPhase,
879        diagnostics: &mut TerminationDiagnostics,
880    ) -> Option<TerminationOutcome> {
881        match self.wait_for_exit_after_signal(timeout).await {
882            Ok(Some(exit_status)) => Some(TerminationOutcome::graceful_success(exit_status)),
883            Ok(None) => {
884                let not_terminated = Self::wait_timeout_diagnostic(timeout);
885                tracing::warn!(
886                    process = %self.name,
887                    signal = signal_name,
888                    next_signal = next_signal_name,
889                    error = %not_terminated,
890                    "Graceful shutdown signal timed out. Attempting next shutdown phase."
891                );
892                diagnostics.record_graceful_wait_error(phase, signal_name, not_terminated);
893                None
894            }
895            Err(wait_error) => {
896                tracing::warn!(
897                    process = %self.name,
898                    signal = signal_name,
899                    next_signal = next_signal_name,
900                    error = %wait_error,
901                    "Graceful shutdown signal timed out. Attempting next shutdown phase."
902                );
903                diagnostics.record_graceful_wait_error(phase, signal_name, wait_error);
904                None
905            }
906        }
907    }
908
909    /// Recovery probe after a graceful signal send failed: waits briefly so a freshly-exited
910    /// child is observed as exited rather than as still running. See
911    /// [`REAP_AFTER_SIGNAL_FAILURE_GRACE`].
912    async fn observe_exit_after_failed_signal(
913        &mut self,
914        signal_name: &'static str,
915        phase: GracefulTerminationPhase,
916        diagnostics: &mut TerminationDiagnostics,
917    ) -> Option<TerminationOutcome> {
918        match self
919            .wait_for_exit_after_signal(REAP_AFTER_SIGNAL_FAILURE_GRACE)
920            .await
921        {
922            Ok(Some(exit_status)) => Some(TerminationOutcome::graceful_success(exit_status)),
923            Ok(None) => None,
924            Err(reap_error) => {
925                tracing::warn!(
926                    process = %self.name,
927                    signal = signal_name,
928                    error = %reap_error,
929                    "Could not determine process state after graceful signal send failed."
930                );
931                diagnostics.record_graceful_status_error(phase, signal_name, reap_error);
932                None
933            }
934        }
935    }
936
937    async fn attempt_forceful_kill(
938        &mut self,
939        mut diagnostics: TerminationDiagnostics,
940    ) -> Result<TerminationOutcome, TerminationError> {
941        match Self::start_kill_process_group(&mut self.child) {
942            Ok(()) => {
943                // Note: A forceful kill should typically (somewhat) immediately lead to
944                // termination of the process. But there are cases in which even a forceful kill
945                // does not / cannot / will not kill a process. We do not want to wait indefinitely
946                // in case this happens and therefore wait (at max) for a fixed duration after any
947                // kill.
948                match self
949                    .wait_for_exit_after_signal(FORCE_KILL_WAIT_TIMEOUT)
950                    .await
951                {
952                    Ok(Some(exit_status)) => {
953                        Ok(TerminationOutcome::force_kill_success(exit_status))
954                    }
955                    Ok(None) => {
956                        let not_terminated_after_kill =
957                            Self::wait_timeout_diagnostic(FORCE_KILL_WAIT_TIMEOUT);
958                        // Unlikely. See the note above.
959                        tracing::error!(
960                            process = %self.name,
961                            kill_signal = KILL_LABEL,
962                            "Process did not terminate after all termination attempts. Process may still be running. Manual intervention and investigation required!"
963                        );
964                        diagnostics.record_kill_wait_error(not_terminated_after_kill);
965                        Err(diagnostics.into_termination_failed(self.name.clone()))
966                    }
967                    Err(not_terminated_after_kill) => {
968                        // Unlikely. See the note above.
969                        tracing::error!(
970                            process = %self.name,
971                            kill_signal = KILL_LABEL,
972                            "Process did not terminate after all termination attempts. Process may still be running. Manual intervention and investigation required!"
973                        );
974                        diagnostics.record_kill_wait_error(not_terminated_after_kill);
975                        Err(diagnostics.into_termination_failed(self.name.clone()))
976                    }
977                }
978            }
979            Err(kill_error) => {
980                tracing::error!(
981                    process = %self.name,
982                    error = %kill_error,
983                    signal = KILL_LABEL,
984                    "Forceful shutdown failed. Process may still be running. Manual intervention required!"
985                );
986                diagnostics.record_kill_signal_error(kill_error);
987
988                // Brief grace for Tokio's SIGCHLD reaper to catch up - see
989                // `REAP_AFTER_SIGNAL_FAILURE_GRACE`.
990                match self
991                    .wait_for_exit_after_signal(REAP_AFTER_SIGNAL_FAILURE_GRACE)
992                    .await
993                {
994                    Ok(Some(exit_status)) => {
995                        return Ok(TerminationOutcome::graceful_success(exit_status));
996                    }
997                    Ok(None) => {}
998                    Err(reap_error) => {
999                        tracing::warn!(
1000                            process = %self.name,
1001                            signal = KILL_LABEL,
1002                            error = %reap_error,
1003                            "Could not determine process state after forceful shutdown failed."
1004                        );
1005                        diagnostics.record_kill_status_error(reap_error);
1006                    }
1007                }
1008
1009                Err(diagnostics.into_termination_failed(self.name.clone()))
1010            }
1011        }
1012    }
1013}
1014
1015#[cfg(test)]
1016mod tests;