Skip to main content

running_process/broker/backend_lifecycle/
verify_pid.rs

1//! Process identity verification for backend handles.
2
3use std::fs;
4use std::io;
5use std::path::{Path, PathBuf};
6
7use crate::broker::backend_lifecycle::identity::{self, DaemonProcess};
8use crate::broker::host_identity;
9
10/// Verify a daemon process identity and return an OS liveness handle.
11pub fn verify_daemon_process(expected: &DaemonProcess) -> Result<ProcessHandle, VerifyPidError> {
12    if expected.pid == 0 {
13        return Err(VerifyPidError::InvalidPid(expected.pid));
14    }
15
16    let current_boot_id = host_identity::current().boot_id;
17    if !expected.boot_id.is_empty()
18        && !current_boot_id.is_empty()
19        && expected.boot_id != current_boot_id
20    {
21        return Err(VerifyPidError::BootIdMismatch {
22            expected: expected.boot_id.clone(),
23            actual: current_boot_id,
24        });
25    }
26
27    let handle = ProcessHandle::open(expected.pid)?;
28    let exe_path = process_exe_path(expected.pid).map_err(|source| VerifyPidError::ExePath {
29        pid: expected.pid,
30        source,
31    })?;
32    if !same_exe_path(&exe_path, &expected.exe_path) {
33        return Err(VerifyPidError::ExePathMismatch {
34            pid: expected.pid,
35            expected: expected.exe_path.clone(),
36            actual: exe_path,
37        });
38    }
39
40    let actual_sha256 =
41        identity::sha256_file(&exe_path).map_err(|source| VerifyPidError::ExeHash {
42            pid: expected.pid,
43            path: exe_path.clone(),
44            source,
45        })?;
46    if actual_sha256 != expected.exe_sha256 {
47        return Err(VerifyPidError::ExeSha256Mismatch { pid: expected.pid });
48    }
49
50    Ok(handle)
51}
52
53/// Return whether a process ID currently resolves to a live process.
54pub fn process_is_alive(pid: u32) -> bool {
55    ProcessHandle::open(pid)
56        .map(|handle| handle.is_alive())
57        .unwrap_or(false)
58}
59
60/// Send a graceful terminate signal where the platform has one.
61pub fn signal_terminate(pid: u32) -> Result<(), VerifyPidError> {
62    platform_signal_terminate(pid)
63}
64
65/// Force-kill a process ID.
66pub fn force_kill_pid(pid: u32) -> Result<(), VerifyPidError> {
67    platform_force_kill(pid)
68}
69
70/// Errors returned while verifying a daemon process.
71#[derive(Debug, thiserror::Error)]
72pub enum VerifyPidError {
73    /// PID zero or a value outside the native PID range is never valid.
74    #[error("invalid daemon pid: {0}")]
75    InvalidPid(u32),
76    /// The process is not currently alive.
77    #[error("process not found: {pid}")]
78    NotFound {
79        /// Process ID that could not be opened.
80        pid: u32,
81    },
82    /// The manifest was written during a prior host boot.
83    #[error("daemon boot id mismatch: expected {expected}, current {actual}")]
84    BootIdMismatch {
85        /// Boot ID stored with the daemon identity.
86        expected: String,
87        /// Current host boot ID.
88        actual: String,
89    },
90    /// The executable could not be hashed.
91    #[error("failed to hash executable for pid {pid} at {path:?}: {source}")]
92    ExeHash {
93        /// Process ID being verified.
94        pid: u32,
95        /// Executable path selected for hashing.
96        path: PathBuf,
97        /// Underlying I/O error.
98        source: io::Error,
99    },
100    /// The executable path for the process could not be read.
101    #[error("failed to resolve executable path for pid {pid}: {source}")]
102    ExePath {
103        /// Process ID being verified.
104        pid: u32,
105        /// Underlying platform error.
106        source: io::Error,
107    },
108    /// The executable path did not match the manifest identity.
109    #[error(
110        "daemon executable path mismatch for pid {pid}: expected {expected:?}, actual {actual:?}"
111    )]
112    ExePathMismatch {
113        /// Process ID being verified.
114        pid: u32,
115        /// Executable path stored with the daemon identity.
116        expected: PathBuf,
117        /// Executable path reported by the operating system.
118        actual: PathBuf,
119    },
120    /// The executable hash did not match the manifest identity.
121    #[error("daemon executable sha256 mismatch for pid {pid}")]
122    ExeSha256Mismatch {
123        /// Process ID being verified.
124        pid: u32,
125    },
126    /// A platform process-handle operation failed.
127    #[error("process handle operation failed for pid {pid}: {source}")]
128    Handle {
129        /// Process ID being opened or signalled.
130        pid: u32,
131        /// Underlying platform error.
132        source: io::Error,
133    },
134    /// The platform has no graceful shutdown primitive in this foundation.
135    #[error("graceful terminate is unsupported on this platform")]
136    GracefulTerminateUnsupported,
137}
138
139#[cfg(unix)]
140mod platform {
141    use std::io;
142
143    #[cfg(target_os = "macos")]
144    use std::ptr;
145
146    #[cfg(any(target_os = "linux", target_os = "macos"))]
147    use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
148    #[cfg(target_os = "macos")]
149    use std::sync::atomic::{AtomicBool, Ordering};
150
151    use super::VerifyPidError;
152
153    /// Platform liveness handle for a backend process.
154    pub struct ProcessHandle {
155        pid: u32,
156        #[cfg(target_os = "linux")]
157        pid_fd: Option<OwnedFd>,
158        #[cfg(target_os = "macos")]
159        exit_kqueue: OwnedFd,
160        #[cfg(target_os = "macos")]
161        exited: AtomicBool,
162    }
163
164    impl ProcessHandle {
165        pub(crate) fn open(pid: u32) -> Result<Self, VerifyPidError> {
166            validate_pid(pid)?;
167            #[cfg(target_os = "macos")]
168            {
169                Ok(Self {
170                    pid,
171                    exit_kqueue: open_exit_kqueue(pid)?,
172                    exited: AtomicBool::new(false),
173                })
174            }
175
176            #[cfg(target_os = "linux")]
177            {
178                if !process_exists(pid) {
179                    return Err(VerifyPidError::NotFound { pid });
180                }
181                Ok(Self {
182                    pid,
183                    pid_fd: try_pidfd_open(pid)?,
184                })
185            }
186
187            #[cfg(all(not(target_os = "linux"), not(target_os = "macos")))]
188            {
189                if !process_exists(pid) {
190                    return Err(VerifyPidError::NotFound { pid });
191                }
192                Ok(Self { pid })
193            }
194        }
195
196        /// Process ID associated with this handle.
197        pub fn pid(&self) -> u32 {
198            self.pid
199        }
200
201        /// Return whether the process represented by this handle is alive.
202        pub fn is_alive(&self) -> bool {
203            #[cfg(target_os = "linux")]
204            {
205                if let Some(pid_fd) = self.pid_fd.as_ref() {
206                    return pidfd_is_alive(pid_fd);
207                }
208                process_exists(self.pid)
209            }
210
211            #[cfg(target_os = "macos")]
212            {
213                !self.exited.load(Ordering::Relaxed)
214                    && kqueue_process_is_alive(&self.exit_kqueue, &self.exited)
215            }
216
217            #[cfg(all(not(target_os = "linux"), not(target_os = "macos")))]
218            {
219                process_exists(self.pid)
220            }
221        }
222    }
223
224    #[cfg(not(target_os = "macos"))]
225    pub(crate) fn process_exists(pid: u32) -> bool {
226        let Ok(native_pid) = validate_pid(pid) else {
227            return false;
228        };
229        let rc = unsafe { libc::kill(native_pid, 0) };
230        if rc == 0 {
231            return true;
232        }
233        matches!(io::Error::last_os_error().raw_os_error(), Some(libc::EPERM))
234    }
235
236    pub(crate) fn platform_signal_terminate(pid: u32) -> Result<(), VerifyPidError> {
237        let native_pid = validate_pid(pid)?;
238        let rc = unsafe { libc::kill(native_pid, libc::SIGTERM) };
239        if rc == 0 {
240            Ok(())
241        } else {
242            Err(VerifyPidError::Handle {
243                pid,
244                source: io::Error::last_os_error(),
245            })
246        }
247    }
248
249    pub(crate) fn platform_force_kill(pid: u32) -> Result<(), VerifyPidError> {
250        let native_pid = validate_pid(pid)?;
251        let rc = unsafe { libc::kill(native_pid, libc::SIGKILL) };
252        if rc == 0 {
253            Ok(())
254        } else {
255            Err(VerifyPidError::Handle {
256                pid,
257                source: io::Error::last_os_error(),
258            })
259        }
260    }
261
262    fn validate_pid(pid: u32) -> Result<libc::pid_t, VerifyPidError> {
263        if pid == 0 || pid > libc::pid_t::MAX as u32 {
264            Err(VerifyPidError::InvalidPid(pid))
265        } else {
266            Ok(pid as libc::pid_t)
267        }
268    }
269
270    #[cfg(target_os = "macos")]
271    fn open_exit_kqueue(pid: u32) -> Result<OwnedFd, VerifyPidError> {
272        let native_pid = validate_pid(pid)?;
273        let raw_fd = unsafe { libc::kqueue() };
274        if raw_fd < 0 {
275            return Err(VerifyPidError::Handle {
276                pid,
277                source: io::Error::last_os_error(),
278            });
279        }
280
281        let kqueue_fd = unsafe { OwnedFd::from_raw_fd(raw_fd) };
282        let change = libc::kevent {
283            ident: native_pid as libc::uintptr_t,
284            filter: libc::EVFILT_PROC,
285            flags: libc::EV_ADD | libc::EV_CLEAR,
286            fflags: libc::NOTE_EXIT,
287            data: 0,
288            udata: ptr::null_mut(),
289        };
290        let rc = unsafe {
291            libc::kevent(
292                kqueue_fd.as_raw_fd(),
293                &change,
294                1,
295                ptr::null_mut(),
296                0,
297                ptr::null(),
298            )
299        };
300        if rc == 0 {
301            return Ok(kqueue_fd);
302        }
303
304        let source = io::Error::last_os_error();
305        if matches!(source.raw_os_error(), Some(libc::ESRCH)) {
306            Err(VerifyPidError::NotFound { pid })
307        } else {
308            Err(VerifyPidError::Handle { pid, source })
309        }
310    }
311
312    #[cfg(target_os = "macos")]
313    fn kqueue_process_is_alive(kqueue_fd: &OwnedFd, exited: &AtomicBool) -> bool {
314        let mut event = std::mem::MaybeUninit::<libc::kevent>::uninit();
315        let timeout = libc::timespec {
316            tv_sec: 0,
317            tv_nsec: 0,
318        };
319        let rc = unsafe {
320            libc::kevent(
321                kqueue_fd.as_raw_fd(),
322                ptr::null(),
323                0,
324                event.as_mut_ptr(),
325                1,
326                &timeout,
327            )
328        };
329        if rc == 0 {
330            return true;
331        }
332
333        exited.store(true, Ordering::Relaxed);
334        false
335    }
336
337    #[cfg(target_os = "linux")]
338    fn try_pidfd_open(pid: u32) -> Result<Option<OwnedFd>, VerifyPidError> {
339        let raw = unsafe { libc::syscall(libc::SYS_pidfd_open, pid as libc::pid_t, 0_u32) };
340        if raw >= 0 {
341            let fd = unsafe { OwnedFd::from_raw_fd(raw as i32) };
342            return Ok(Some(fd));
343        }
344
345        let err = io::Error::last_os_error();
346        match err.raw_os_error() {
347            Some(libc::ENOSYS | libc::EINVAL | libc::EPERM) => Ok(None),
348            Some(libc::ESRCH) => Err(VerifyPidError::NotFound { pid }),
349            _ => Ok(None),
350        }
351    }
352
353    #[cfg(target_os = "linux")]
354    fn pidfd_is_alive(pid_fd: &OwnedFd) -> bool {
355        let mut poll_fd = libc::pollfd {
356            fd: pid_fd.as_raw_fd(),
357            events: libc::POLLIN,
358            revents: 0,
359        };
360        let rc = unsafe { libc::poll(&mut poll_fd, 1, 0) };
361        rc == 0
362    }
363}
364
365#[cfg(windows)]
366mod platform {
367    use std::io;
368
369    use windows_sys::Win32::Foundation::{CloseHandle, HANDLE};
370    use windows_sys::Win32::System::Threading::{
371        GetExitCodeProcess, OpenProcess, TerminateProcess, PROCESS_QUERY_LIMITED_INFORMATION,
372        PROCESS_TERMINATE,
373    };
374
375    use super::VerifyPidError;
376
377    const STILL_ACTIVE: u32 = 259;
378
379    /// Platform liveness handle for a backend process.
380    pub struct ProcessHandle {
381        pid: u32,
382        handle: HANDLE,
383    }
384
385    impl ProcessHandle {
386        pub(crate) fn open(pid: u32) -> Result<Self, VerifyPidError> {
387            let handle = unsafe { OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid) };
388            if handle.is_null() {
389                return Err(VerifyPidError::NotFound { pid });
390            }
391            Ok(Self { pid, handle })
392        }
393
394        /// Process ID associated with this handle.
395        pub fn pid(&self) -> u32 {
396            self.pid
397        }
398
399        /// Return whether the process represented by this handle is alive.
400        pub fn is_alive(&self) -> bool {
401            let mut exit_code = 0_u32;
402            let ok = unsafe { GetExitCodeProcess(self.handle, &mut exit_code) };
403            ok != 0 && exit_code == STILL_ACTIVE
404        }
405    }
406
407    impl Drop for ProcessHandle {
408        fn drop(&mut self) {
409            unsafe {
410                CloseHandle(self.handle);
411            }
412        }
413    }
414
415    pub(crate) fn platform_signal_terminate(_pid: u32) -> Result<(), VerifyPidError> {
416        Err(VerifyPidError::GracefulTerminateUnsupported)
417    }
418
419    pub(crate) fn platform_force_kill(pid: u32) -> Result<(), VerifyPidError> {
420        let handle = unsafe { OpenProcess(PROCESS_TERMINATE, 0, pid) };
421        if handle.is_null() {
422            return Err(VerifyPidError::NotFound { pid });
423        }
424        let ok = unsafe { TerminateProcess(handle, 1) };
425        let source = io::Error::last_os_error();
426        unsafe {
427            CloseHandle(handle);
428        }
429        if ok == 0 {
430            Err(VerifyPidError::Handle { pid, source })
431        } else {
432            Ok(())
433        }
434    }
435}
436
437pub use platform::ProcessHandle;
438use platform::{platform_force_kill, platform_signal_terminate};
439
440fn process_exe_path(pid: u32) -> Result<PathBuf, io::Error> {
441    #[cfg(target_os = "linux")]
442    {
443        std::fs::read_link(format!("/proc/{pid}/exe"))
444    }
445
446    #[cfg(target_os = "windows")]
447    {
448        use windows_sys::Win32::Foundation::CloseHandle;
449        use windows_sys::Win32::System::Threading::{
450            OpenProcess, QueryFullProcessImageNameW, PROCESS_QUERY_LIMITED_INFORMATION,
451        };
452
453        let handle = unsafe { OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid) };
454        if handle.is_null() {
455            return Err(io::Error::last_os_error());
456        }
457
458        let mut path = vec![0_u16; 32768];
459        let mut len = path.len() as u32;
460        let ok = unsafe { QueryFullProcessImageNameW(handle, 0, path.as_mut_ptr(), &mut len) };
461        let source = io::Error::last_os_error();
462        unsafe {
463            CloseHandle(handle);
464        }
465        if ok == 0 {
466            return Err(source);
467        }
468
469        path.truncate(len as usize);
470        Ok(PathBuf::from(String::from_utf16_lossy(&path)))
471    }
472
473    #[cfg(all(not(target_os = "linux"), not(target_os = "windows")))]
474    {
475        let mut system = sysinfo::System::new_all();
476        system.refresh_processes();
477        if let Some(process) = system.process(sysinfo::Pid::from_u32(pid)) {
478            if let Some(exe) = process.exe() {
479                return Ok(exe.to_path_buf());
480            }
481        }
482        Err(io::Error::new(
483            io::ErrorKind::NotFound,
484            "process executable path not found",
485        ))
486    }
487}
488
489fn same_exe_path(actual: &Path, expected: &Path) -> bool {
490    let actual = fs::canonicalize(actual).unwrap_or_else(|_| actual.to_path_buf());
491    let expected = fs::canonicalize(expected).unwrap_or_else(|_| expected.to_path_buf());
492
493    #[cfg(windows)]
494    {
495        comparable_windows_path(&actual) == comparable_windows_path(&expected)
496    }
497
498    #[cfg(not(windows))]
499    {
500        actual == expected
501    }
502}
503
504#[cfg(windows)]
505fn comparable_windows_path(path: &Path) -> String {
506    let path = path.to_string_lossy().replace('\\', "/");
507    let path = path.strip_prefix("//?/").unwrap_or(&path);
508    path.to_ascii_lowercase()
509}