Skip to main content

running_process/broker/lifecycle/
process_tree.rs

1//! Process-tree cleanup setup for the broker.
2//!
3//! The broker can launch backend processes. Installing cleanup before
4//! argument dispatch ensures later serve modes inherit the same
5//! parent-death / kill-on-close containment behavior from process start.
6
7use std::{io, time::Duration};
8
9/// Cleanup mechanism installed, or concrete lifecycle contract selected, for
10/// the current broker process.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum ProcessTreeCleanup {
13    /// Linux `PR_SET_PDEATHSIG` was installed for the broker process.
14    LinuxParentDeathSignal,
15    /// Windows kill-on-job-close containment was installed.
16    WindowsKillOnJobClose,
17    /// Windows reported that the process already belongs to a Job Object.
18    WindowsAlreadyInJob,
19    /// macOS kqueue-supervisor containment is the Phase 5 contract.
20    MacosKqueueSupervisorContract,
21    /// The current platform has no broker process-tree primitive yet.
22    UnsupportedNoop,
23}
24
25/// Maximum Phase 5 cleanup budget for a macOS backend after broker exit.
26pub const MACOS_SUPERVISOR_KILL_DEADLINE: Duration = Duration::from_secs(5);
27
28/// Concrete macOS supervisor contract for Phase 5 process-tree cleanup.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub struct MacosSupervisorContract {
31    /// PID that the supervisor child watches.
32    pub watch_pid: MacosSupervisorWatchPid,
33    /// kqueue filter registered by the supervisor.
34    pub kqueue_filter: MacosKqueueFilter,
35    /// kqueue note that reports broker exit.
36    pub kqueue_note: MacosKqueueNote,
37    /// Startup barrier before the backend endpoint can be published.
38    pub registration_barrier: MacosSupervisorRegistrationBarrier,
39    /// Race guard after kqueue registration.
40    pub race_guard: MacosSupervisorRaceGuard,
41    /// Action the supervisor performs after observing broker exit.
42    pub exit_action: MacosSupervisorExitAction,
43    /// Required cleanup deadline after broker exit.
44    pub kill_deadline: Duration,
45}
46
47impl MacosSupervisorContract {
48    /// Return the Phase 5 macOS supervisor contract.
49    pub const fn phase5() -> Self {
50        Self {
51            watch_pid: MacosSupervisorWatchPid::BrokerParent,
52            kqueue_filter: MacosKqueueFilter::Process,
53            kqueue_note: MacosKqueueNote::Exit,
54            registration_barrier: MacosSupervisorRegistrationBarrier::BeforeBackendPipePublication,
55            race_guard: MacosSupervisorRaceGuard::RecheckBrokerAliveAfterRegistration,
56            exit_action: MacosSupervisorExitAction::SigkillBackend,
57            kill_deadline: MACOS_SUPERVISOR_KILL_DEADLINE,
58        }
59    }
60
61    /// Return the kqueue filter syscall name.
62    pub const fn kqueue_filter_name(&self) -> &'static str {
63        match self.kqueue_filter {
64            MacosKqueueFilter::Process => "EVFILT_PROC",
65        }
66    }
67
68    /// Return the kqueue note syscall name.
69    pub const fn kqueue_note_name(&self) -> &'static str {
70        match self.kqueue_note {
71            MacosKqueueNote::Exit => "NOTE_EXIT",
72        }
73    }
74
75    /// Return the supervisor termination signal name.
76    pub const fn termination_signal_name(&self) -> &'static str {
77        match self.exit_action {
78            MacosSupervisorExitAction::SigkillBackend => "SIGKILL",
79        }
80    }
81}
82
83/// PID watched by the macOS supervisor child.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub enum MacosSupervisorWatchPid {
86    /// Watch the broker parent process.
87    BrokerParent,
88}
89
90/// kqueue filter used by the macOS supervisor child.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum MacosKqueueFilter {
93    /// `EVFILT_PROC`.
94    Process,
95}
96
97/// kqueue process note used by the macOS supervisor child.
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub enum MacosKqueueNote {
100    /// `NOTE_EXIT`.
101    Exit,
102}
103
104/// Required startup barrier for the macOS supervisor child.
105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106pub enum MacosSupervisorRegistrationBarrier {
107    /// Register kqueue before the backend pipe is published.
108    BeforeBackendPipePublication,
109}
110
111/// Required startup race guard for the macOS supervisor child.
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113pub enum MacosSupervisorRaceGuard {
114    /// Re-check that the broker is alive after kqueue registration.
115    RecheckBrokerAliveAfterRegistration,
116}
117
118/// Action performed by the macOS supervisor child after broker exit.
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub enum MacosSupervisorExitAction {
121    /// Send `SIGKILL` to the backend process.
122    SigkillBackend,
123}
124
125/// Return the concrete macOS kqueue-supervisor contract for Phase 5.
126pub const fn macos_supervisor_contract() -> MacosSupervisorContract {
127    MacosSupervisorContract::phase5()
128}
129
130/// Errors returned while installing process-tree cleanup.
131#[derive(Debug, thiserror::Error)]
132pub enum ProcessTreeError {
133    /// Linux `prctl(PR_SET_PDEATHSIG, ...)` failed.
134    #[error("failed to install Linux parent-death signal: {0}")]
135    LinuxParentDeathSignal(io::Error),
136    /// Windows could not create or configure a kill-on-close job.
137    #[error("failed to create Windows kill-on-close Job Object: {0}")]
138    WindowsJobCreate(io::Error),
139    /// Windows could not assign the broker process to the job.
140    #[error("failed to assign broker process to Windows Job Object: {0}")]
141    WindowsJobAssign(io::Error),
142}
143
144/// Install process-tree cleanup for the current broker process.
145///
146/// On Linux this sets `PR_SET_PDEATHSIG` to `SIGTERM`. On Windows this assigns
147/// the broker to a kill-on-close Job Object unless it already belongs to one.
148/// On macOS this selects
149/// [`ProcessTreeCleanup::MacosKqueueSupervisorContract`] and the concrete
150/// [`MacosSupervisorContract`] that backend spawn wiring must honor before
151/// publishing a backend pipe.
152/// Other platforms currently return
153/// [`ProcessTreeCleanup::UnsupportedNoop`].
154pub fn install_cleanup() -> Result<ProcessTreeCleanup, ProcessTreeError> {
155    platform_install_cleanup()
156}
157
158/// Return the cleanup mechanism this platform attempts to install.
159pub fn cleanup_target() -> ProcessTreeCleanup {
160    cleanup_target_for_platform(current_platform())
161}
162
163#[derive(Debug, Clone, Copy, PartialEq, Eq)]
164enum CleanupPlatform {
165    #[cfg(any(target_os = "linux", test))]
166    Linux,
167    #[cfg(any(windows, test))]
168    Windows,
169    #[cfg(any(target_os = "macos", test))]
170    Macos,
171    #[cfg(any(
172        all(unix, not(any(target_os = "linux", target_os = "macos"))),
173        all(not(unix), not(windows)),
174        test
175    ))]
176    Other,
177}
178
179fn cleanup_target_for_platform(platform: CleanupPlatform) -> ProcessTreeCleanup {
180    match platform {
181        #[cfg(any(target_os = "linux", test))]
182        CleanupPlatform::Linux => ProcessTreeCleanup::LinuxParentDeathSignal,
183        #[cfg(any(windows, test))]
184        CleanupPlatform::Windows => ProcessTreeCleanup::WindowsKillOnJobClose,
185        #[cfg(any(target_os = "macos", test))]
186        CleanupPlatform::Macos => ProcessTreeCleanup::MacosKqueueSupervisorContract,
187        #[cfg(any(
188            all(unix, not(any(target_os = "linux", target_os = "macos"))),
189            all(not(unix), not(windows)),
190            test
191        ))]
192        CleanupPlatform::Other => ProcessTreeCleanup::UnsupportedNoop,
193    }
194}
195
196#[cfg(target_os = "linux")]
197fn current_platform() -> CleanupPlatform {
198    CleanupPlatform::Linux
199}
200
201#[cfg(windows)]
202fn current_platform() -> CleanupPlatform {
203    CleanupPlatform::Windows
204}
205
206#[cfg(target_os = "macos")]
207fn current_platform() -> CleanupPlatform {
208    CleanupPlatform::Macos
209}
210
211#[cfg(all(unix, not(any(target_os = "linux", target_os = "macos"))))]
212fn current_platform() -> CleanupPlatform {
213    CleanupPlatform::Other
214}
215
216#[cfg(all(not(unix), not(windows)))]
217fn current_platform() -> CleanupPlatform {
218    CleanupPlatform::Other
219}
220
221#[cfg(target_os = "linux")]
222fn platform_install_cleanup() -> Result<ProcessTreeCleanup, ProcessTreeError> {
223    let rc = unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, linux_parent_death_signal()) };
224    if rc == -1 {
225        Err(ProcessTreeError::LinuxParentDeathSignal(
226            io::Error::last_os_error(),
227        ))
228    } else {
229        Ok(ProcessTreeCleanup::LinuxParentDeathSignal)
230    }
231}
232
233#[cfg(target_os = "linux")]
234fn linux_parent_death_signal() -> libc::c_int {
235    libc::SIGTERM
236}
237
238#[cfg(windows)]
239fn platform_install_cleanup() -> Result<ProcessTreeCleanup, ProcessTreeError> {
240    if JOB_HANDLE.get().is_some() {
241        return Ok(ProcessTreeCleanup::WindowsKillOnJobClose);
242    }
243
244    let job = create_kill_on_close_job()?;
245    match assign_current_process_to_job(job.as_raw()) {
246        Ok(()) => match JOB_HANDLE.set(job) {
247            Ok(()) => Ok(ProcessTreeCleanup::WindowsKillOnJobClose),
248            Err(job) => {
249                // Avoid closing a job handle that may contain the current
250                // process. Leaking the duplicate setup handle is preferable
251                // to terminating the broker in an impossible double-install
252                // race.
253                std::mem::forget(job);
254                Ok(ProcessTreeCleanup::WindowsAlreadyInJob)
255            }
256        },
257        Err(source) if windows_error_is_access_denied(&source) => {
258            Ok(ProcessTreeCleanup::WindowsAlreadyInJob)
259        }
260        Err(source) => Err(ProcessTreeError::WindowsJobAssign(source)),
261    }
262}
263
264#[cfg(target_os = "macos")]
265fn platform_install_cleanup() -> Result<ProcessTreeCleanup, ProcessTreeError> {
266    Ok(ProcessTreeCleanup::MacosKqueueSupervisorContract)
267}
268
269#[cfg(all(unix, not(any(target_os = "linux", target_os = "macos"))))]
270fn platform_install_cleanup() -> Result<ProcessTreeCleanup, ProcessTreeError> {
271    Ok(ProcessTreeCleanup::UnsupportedNoop)
272}
273
274#[cfg(all(not(unix), not(windows)))]
275fn platform_install_cleanup() -> Result<ProcessTreeCleanup, ProcessTreeError> {
276    Ok(ProcessTreeCleanup::UnsupportedNoop)
277}
278
279#[cfg(windows)]
280static JOB_HANDLE: std::sync::OnceLock<WindowsJobHandle> = std::sync::OnceLock::new();
281
282#[cfg(windows)]
283struct WindowsJobHandle(usize);
284
285#[cfg(windows)]
286impl WindowsJobHandle {
287    fn as_raw(&self) -> winapi::um::winnt::HANDLE {
288        self.0 as winapi::um::winnt::HANDLE
289    }
290}
291
292#[cfg(windows)]
293impl Drop for WindowsJobHandle {
294    fn drop(&mut self) {
295        unsafe {
296            winapi::um::handleapi::CloseHandle(self.as_raw());
297        }
298    }
299}
300
301#[cfg(windows)]
302fn create_kill_on_close_job() -> Result<WindowsJobHandle, ProcessTreeError> {
303    use winapi::shared::minwindef::FALSE;
304    use winapi::um::handleapi::{CloseHandle, INVALID_HANDLE_VALUE};
305    use winapi::um::jobapi2::{CreateJobObjectW, SetInformationJobObject};
306    use winapi::um::winnt::{
307        JobObjectExtendedLimitInformation, JOBOBJECT_EXTENDED_LIMIT_INFORMATION,
308        JOB_OBJECT_LIMIT_BREAKAWAY_OK, JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
309    };
310
311    let job = unsafe { CreateJobObjectW(std::ptr::null_mut(), std::ptr::null()) };
312    if job.is_null() || job == INVALID_HANDLE_VALUE {
313        return Err(ProcessTreeError::WindowsJobCreate(
314            io::Error::last_os_error(),
315        ));
316    }
317
318    let mut info: JOBOBJECT_EXTENDED_LIMIT_INFORMATION = unsafe { std::mem::zeroed() };
319    info.BasicLimitInformation.LimitFlags =
320        JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE | JOB_OBJECT_LIMIT_BREAKAWAY_OK;
321    let ok = unsafe {
322        SetInformationJobObject(
323            job,
324            JobObjectExtendedLimitInformation,
325            (&mut info as *mut JOBOBJECT_EXTENDED_LIMIT_INFORMATION).cast(),
326            std::mem::size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
327        )
328    };
329    if ok == FALSE {
330        let err = io::Error::last_os_error();
331        unsafe { CloseHandle(job) };
332        return Err(ProcessTreeError::WindowsJobCreate(err));
333    }
334
335    Ok(WindowsJobHandle(job as usize))
336}
337
338#[cfg(windows)]
339fn assign_current_process_to_job(job: winapi::um::winnt::HANDLE) -> Result<(), io::Error> {
340    use winapi::shared::minwindef::FALSE;
341    use winapi::um::jobapi2::AssignProcessToJobObject;
342    use winapi::um::processthreadsapi::GetCurrentProcess;
343
344    let ok = unsafe { AssignProcessToJobObject(job, GetCurrentProcess()) };
345    if ok == FALSE {
346        Err(io::Error::last_os_error())
347    } else {
348        Ok(())
349    }
350}
351
352#[cfg(windows)]
353fn windows_error_is_access_denied(err: &io::Error) -> bool {
354    use winapi::shared::winerror::ERROR_ACCESS_DENIED;
355
356    err.raw_os_error() == Some(ERROR_ACCESS_DENIED as i32)
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn cleanup_target_model_states_phase_5_platform_contracts() {
365        assert_eq!(
366            cleanup_target_for_platform(CleanupPlatform::Linux),
367            ProcessTreeCleanup::LinuxParentDeathSignal
368        );
369        assert_eq!(
370            cleanup_target_for_platform(CleanupPlatform::Windows),
371            ProcessTreeCleanup::WindowsKillOnJobClose
372        );
373        assert_eq!(
374            cleanup_target_for_platform(CleanupPlatform::Macos),
375            ProcessTreeCleanup::MacosKqueueSupervisorContract
376        );
377        assert_eq!(
378            cleanup_target_for_platform(CleanupPlatform::Other),
379            ProcessTreeCleanup::UnsupportedNoop
380        );
381    }
382
383    #[test]
384    fn cleanup_target_is_explicit_for_current_platform() {
385        #[cfg(target_os = "linux")]
386        assert_eq!(cleanup_target(), ProcessTreeCleanup::LinuxParentDeathSignal);
387
388        #[cfg(windows)]
389        assert_eq!(cleanup_target(), ProcessTreeCleanup::WindowsKillOnJobClose);
390
391        #[cfg(target_os = "macos")]
392        assert_eq!(
393            cleanup_target(),
394            ProcessTreeCleanup::MacosKqueueSupervisorContract
395        );
396
397        #[cfg(all(not(any(target_os = "linux", target_os = "macos")), not(windows)))]
398        assert_eq!(cleanup_target(), ProcessTreeCleanup::UnsupportedNoop);
399    }
400
401    #[cfg(target_os = "linux")]
402    #[test]
403    fn linux_parent_death_signal_is_sigterm() {
404        assert_eq!(linux_parent_death_signal(), libc::SIGTERM);
405    }
406
407    #[test]
408    fn macos_supervisor_contract_pins_phase_5_cleanup_requirements() {
409        let contract = macos_supervisor_contract();
410
411        assert_eq!(contract.watch_pid, MacosSupervisorWatchPid::BrokerParent);
412        assert_eq!(contract.kqueue_filter_name(), "EVFILT_PROC");
413        assert_eq!(contract.kqueue_note_name(), "NOTE_EXIT");
414        assert_eq!(
415            contract.registration_barrier,
416            MacosSupervisorRegistrationBarrier::BeforeBackendPipePublication
417        );
418        assert_eq!(
419            contract.race_guard,
420            MacosSupervisorRaceGuard::RecheckBrokerAliveAfterRegistration
421        );
422        assert_eq!(contract.termination_signal_name(), "SIGKILL");
423        assert_eq!(contract.kill_deadline, Duration::from_secs(5));
424    }
425}