procref/lib.rs
1//! # procref - Cross-platform Process Reference Counting
2//!
3//! A library for managing shared service lifecycles across multiple processes
4//! using kernel-level reference counting mechanisms.
5//!
6//! ## Platform Support
7//!
8//! | Platform | Mechanism | Auto-cleanup on crash |
9//! |----------|-----------|----------------------|
10//! | Linux | System V Semaphore + SEM_UNDO | ✅ Kernel auto-undo |
11//! | macOS | Mach Port send rights | ✅ Kernel auto-release |
12//! | Windows | Named Semaphore | ✅ Handle auto-close |
13//!
14//! ## Design Philosophy
15//!
16//! This library trusts the **kernel** to manage reference counts, not userspace files.
17//! When a process crashes:
18//! - The kernel automatically decrements the reference count
19//! - No stale state accumulates
20//! - No manual cleanup needed
21//!
22//! Files (like port hints) are just **hints** for optimization, not the source of truth.
23//! The actual process state is always verified against reality.
24//!
25//! ## Lifecycle Callbacks
26//!
27//! The library provides hooks for business logic at key lifecycle points:
28//! - `on_first_acquire`: Called when first client registers (count 0→1)
29//! - `on_last_release`: Called when last client exits (count 1→0)
30//! - `on_health_check`: Called to verify service is healthy
31//! - `on_recover`: Called when service needs recovery
32//!
33//! ## Usage
34//!
35//! ```no_run
36//! use procref::{SharedService, ServiceInfo};
37//!
38//! # async fn example() -> procref::Result<()> {
39//! let service = SharedService::builder("my-database")
40//! .on_first_acquire(|| async {
41//! // Start the database process
42//! let port = 5432;
43//! let pid = start_database(port)?;
44//! Ok(ServiceInfo::new(pid, port))
45//! })
46//! .on_last_release(|info| async move {
47//! // Stop the database when last client exits
48//! procref::process::terminate(info.pid());
49//! Ok(())
50//! })
51//! .build()?;
52//!
53//! // Acquire a reference (starts service if first client)
54//! let handle = service.acquire().await?;
55//! println!("Service running on port {}", handle.info().port());
56//!
57//! // handle.drop() releases reference
58//! // If last client, on_last_release is called
59//! # Ok(())
60//! # }
61//! # fn start_database(_: u16) -> procref::Result<u32> { Ok(0) }
62//! ```
63
64#![warn(missing_docs)]
65
66mod error;
67mod platform;
68mod service;
69mod types;
70
71pub use error::{Error, Result};
72pub use platform::PlatformRefCounter;
73pub use service::{ServiceHandle, SharedService, SharedServiceBuilder};
74pub use types::ServiceInfo;
75
76/// Core trait for kernel-level reference counting.
77///
78/// Implementations must guarantee that reference counts are automatically
79/// decremented by the kernel when a process crashes or exits abnormally.
80///
81/// This is NOT a userspace reference count - it relies on OS primitives
82/// that the kernel manages.
83pub trait RefCounter: Send + Sync {
84 /// Acquire a reference (increment count).
85 ///
86 /// Returns the count AFTER incrementing.
87 /// First acquisition (0→1) should trigger on_first_acquire callback.
88 fn acquire(&self) -> Result<u32>;
89
90 /// Release a reference (decrement count).
91 ///
92 /// Returns the count AFTER decrementing.
93 /// Last release (1→0) should trigger on_last_release callback.
94 ///
95 /// Note: This is called for graceful shutdown. On crash, the kernel
96 /// handles the decrement automatically.
97 fn release(&self) -> Result<u32>;
98
99 /// Get current reference count.
100 ///
101 /// This may be slightly stale in multi-process scenarios.
102 /// Use for diagnostics, not critical decisions.
103 fn count(&self) -> Result<u32>;
104
105 /// Try to acquire startup lock (for coordinating service startup).
106 ///
107 /// Returns true if lock acquired, false if another process holds it.
108 /// This is non-blocking.
109 fn try_lock(&self) -> Result<bool>;
110
111 /// Release startup lock.
112 fn unlock(&self) -> Result<()>;
113}
114
115/// Utility functions for process management.
116pub mod process {
117 /// Check if a process with given PID is alive.
118 pub fn is_alive(pid: u32) -> bool {
119 #[cfg(unix)]
120 {
121 unsafe { libc::kill(pid as i32, 0) == 0 }
122 }
123
124 #[cfg(windows)]
125 {
126 use windows_sys::Win32::Foundation::CloseHandle;
127 use windows_sys::Win32::System::Threading::{
128 OpenProcess, PROCESS_QUERY_LIMITED_INFORMATION,
129 };
130 unsafe {
131 let handle = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid);
132 if handle == 0 {
133 return false;
134 }
135 CloseHandle(handle);
136 true
137 }
138 }
139
140 #[cfg(not(any(unix, windows)))]
141 {
142 let _ = pid;
143 false
144 }
145 }
146
147 /// Send SIGTERM to a process (graceful shutdown).
148 pub fn terminate(pid: u32) {
149 #[cfg(unix)]
150 unsafe {
151 libc::kill(pid as i32, libc::SIGTERM);
152 }
153
154 #[cfg(windows)]
155 {
156 kill(pid);
157 }
158 }
159
160 /// Send SIGKILL to a process (forceful shutdown).
161 pub fn kill(pid: u32) {
162 #[cfg(unix)]
163 unsafe {
164 libc::kill(pid as i32, libc::SIGKILL);
165 }
166
167 #[cfg(windows)]
168 {
169 use windows_sys::Win32::Foundation::CloseHandle;
170 use windows_sys::Win32::System::Threading::{
171 OpenProcess, TerminateProcess, PROCESS_TERMINATE,
172 };
173 unsafe {
174 let handle = OpenProcess(PROCESS_TERMINATE, 0, pid);
175 if handle != 0 {
176 TerminateProcess(handle, 0);
177 CloseHandle(handle);
178 }
179 }
180 }
181 }
182
183 /// Gracefully stop a process: SIGTERM, wait, then SIGKILL if needed.
184 pub fn stop(pid: u32, timeout_ms: u64) {
185 terminate(pid);
186
187 let start = std::time::Instant::now();
188 while start.elapsed().as_millis() < timeout_ms as u128 {
189 if !is_alive(pid) {
190 return;
191 }
192 std::thread::sleep(std::time::Duration::from_millis(50));
193 }
194
195 kill(pid);
196 }
197}