running_process/broker/brokered_backend.rs
1//! Structurally-enforced fast-bind contract for v2 brokered daemons (#497).
2//!
3//! ## Why
4//!
5//! The v1 launcher (`BackendLauncher::probe_with_service`) requires the
6//! spawned daemon to answer an IPC probe within
7//! `DEFAULT_ENDPOINT_PROBE_TIMEOUT` (~500 ms) after spawn. That budget is
8//! hard-coded in the launcher; nothing in the type system prevents a
9//! daemon implementer from doing 3 s of state loading inside its
10//! bootstrap before the IPC endpoint becomes probe-able. zccache#640
11//! and zccache#784 fix this consumer-side; #497 lifts the invariant
12//! into a broker-owned contract so future brokered services (fbuild
13//! daemon, soldr cache-daemon) cannot silently regress.
14//!
15//! ## Shape (Option A from #497)
16//!
17//! ```text
18//! bind(&endpoint) -> IpcListener // SYNC, microseconds, no state access
19//! |
20//! v
21//! write_lock_file // broker-orchestrated
22//! |
23//! v
24//! serve(listener) -> ! // free to spawn_blocking, take 30s warming
25//! ```
26//!
27//! `bind` has no access to `State` — the daemon physically cannot do
28//! disk I/O before the endpoint is up. The fast-bind property becomes
29//! a compile-time consequence of the trait shape.
30//!
31//! Option B (broker-owned bind via inherited file descriptors / named-
32//! pipe handles) is a strictly stronger refinement deferred to a
33//! follow-up; this slice lands Option A as the minimum viable trait
34//! shape so downstream conformance tests + daemon migrations have a
35//! stable target.
36
37use std::error::Error;
38use std::fmt;
39
40/// Reasons a [`BrokeredBackend::bind`] call can fail.
41///
42/// Deliberately small. The broker treats every variant identically
43/// (declare the spawn dead, surface the error to the operator); the
44/// taxonomy exists so daemons can produce useful logs without inventing
45/// their own error types.
46#[derive(Debug)]
47pub enum BindError {
48 /// The endpoint string was not a valid platform path / pipe name.
49 InvalidEndpoint(String),
50
51 /// Another process already holds the endpoint (`EADDRINUSE`,
52 /// `ERROR_PIPE_BUSY`, etc.).
53 AlreadyBound(String),
54
55 /// Underlying OS error from the bind syscall.
56 Io(std::io::Error),
57
58 /// Catch-all for daemon-specific bind failures (permission
59 /// denied via custom security policy, etc.).
60 Other(String),
61}
62
63impl fmt::Display for BindError {
64 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65 match self {
66 Self::InvalidEndpoint(s) => write!(f, "invalid endpoint: {s}"),
67 Self::AlreadyBound(s) => write!(f, "endpoint already bound: {s}"),
68 Self::Io(e) => write!(f, "bind io error: {e}"),
69 Self::Other(s) => write!(f, "bind error: {s}"),
70 }
71 }
72}
73
74impl Error for BindError {
75 fn source(&self) -> Option<&(dyn Error + 'static)> {
76 match self {
77 Self::Io(e) => Some(e),
78 _ => None,
79 }
80 }
81}
82
83impl From<std::io::Error> for BindError {
84 fn from(e: std::io::Error) -> Self {
85 Self::Io(e)
86 }
87}
88
89/// Uninhabited type used as the return type of [`BrokeredBackend::serve`]
90/// and [`bootstrap`] to express "this function never returns" on
91/// stable Rust.
92///
93/// `!` (the bare never type) is nightly-only as a return-position type;
94/// an empty enum has the same uninhabitedness guarantee and compiles
95/// on stable. Implementers cannot construct one — the only way to
96/// satisfy the signature is to diverge (loop, panic, exit).
97#[derive(Debug)]
98pub enum Never {}
99
100/// Cross-platform `IpcListener` placeholder.
101///
102/// Until the v2 broker baseline lands a unified listener wrapper
103/// (subsequent slice of #488), this type alias points at the existing
104/// `interprocess::local_socket::Listener` so daemons can start
105/// implementing [`BrokeredBackend`] today. A typedef shift later
106/// won't break implementers since they construct the listener via
107/// platform-neutral helpers, not by naming the type.
108pub type IpcListener = interprocess::local_socket::Listener;
109
110/// Opaque endpoint identifier the broker hands the daemon's [`bind`]
111/// method. Today a plain string (matches `ServiceDefinition`'s endpoint
112/// field shape); will gain structure as the v2 broker baseline grows.
113///
114/// [`bind`]: BrokeredBackend::bind
115pub type Endpoint = str;
116
117/// The fast-bind contract a v2 brokered daemon implements.
118///
119/// Trait method ordering matches the orchestration the broker runs
120/// inside [`bootstrap`]:
121///
122/// 1. [`bind`](BrokeredBackend::bind) — claim the kernel resource.
123/// Synchronous, expected to complete in microseconds. **Takes only
124/// the endpoint** — no `&mut self`, no associated-`State` parameter
125/// — so it is structurally impossible to perform daemon-state
126/// initialization before this returns.
127/// 2. (broker-orchestrated) write the lockfile + report spawn success
128/// to the operator.
129/// 3. [`serve`](BrokeredBackend::serve) — accept connections forever.
130/// Free to `spawn_blocking` for arbitrarily slow state loads; the
131/// broker does not observe this. Clients connecting during the
132/// warmup window queue in the OS accept backlog or see whatever
133/// cold-path semantics the daemon defines.
134pub trait BrokeredBackend {
135 /// Daemon-specific state that survives between requests.
136 ///
137 /// Allocated and consumed inside [`serve`] — never visible to
138 /// [`bind`]. The trait's structural guarantee is precisely that
139 /// state initialization cannot run before the endpoint is bound.
140 type State: Send + 'static;
141
142 /// Bind the IPC listener. **No state access, no disk I/O.**
143 ///
144 /// The broker enforces hang detection by failing the spawn if this
145 /// does not return (or if the resulting listener is not probe-able)
146 /// within `DEFAULT_ENDPOINT_PROBE_TIMEOUT`.
147 fn bind(endpoint: &Endpoint) -> Result<IpcListener, BindError>;
148
149 /// Serve forever on the bound listener.
150 ///
151 /// Free to initialize `State` synchronously, `spawn_blocking` heavy
152 /// loads, or anything else — the broker has already declared the
153 /// daemon "spawned successfully" by this point.
154 ///
155 /// Returns `!` because a brokered daemon's normal control flow is
156 /// to serve until termination signal; clean shutdown is via process
157 /// exit. Implementers that want graceful shutdown plumb it through
158 /// `State` (e.g. an `AtomicBool` checked between accept loops).
159 fn serve(listener: IpcListener) -> Never;
160}
161
162/// Run the broker-side fast-bind orchestration for a `BrokeredBackend`.
163///
164/// 1. Call `B::bind(endpoint)`. Failure → return the error.
165/// 2. Hand the listener to `B::serve`.
166///
167/// In the full v2 baseline, the broker calls this from inside the
168/// spawned daemon's `main()`. Slice 3c–4 of #488 has the v2 broker
169/// scaffold but does not yet exercise this trait; this function is the
170/// integration seam future slices will use.
171pub fn bootstrap<B: BrokeredBackend>(endpoint: &Endpoint) -> Result<Never, BindError> {
172 let listener = B::bind(endpoint)?;
173 // Lockfile write lands in the v2 broker baseline; once it does,
174 // it goes here, between `bind` and `serve`.
175 let never = B::serve(listener);
176 // `serve` returns `Never` (uninhabited) so this `Ok` is never
177 // constructed. Match makes the unreachability explicit.
178 match never {}
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use interprocess::local_socket::ListenerOptions;
185
186 /// Reference implementation used to verify the trait shape compiles.
187 struct StubBackend;
188
189 impl BrokeredBackend for StubBackend {
190 type State = ();
191
192 fn bind(endpoint: &Endpoint) -> Result<IpcListener, BindError> {
193 // Bind a real platform listener at a unique test name so the
194 // path exercises the actual interprocess API surface, not
195 // just trait dispatch. The caller passes a per-test
196 // suffix via `endpoint` so parallel cargo-test runs don't
197 // collide on the same name.
198 #[cfg(windows)]
199 let name = {
200 use interprocess::local_socket::{GenericNamespaced, ToNsName};
201 let bare = format!("rp-brokered-backend-stub-{endpoint}");
202 ToNsName::to_ns_name::<GenericNamespaced>(bare.as_str())?
203 .into_owned()
204 };
205 #[cfg(unix)]
206 let name = {
207 use interprocess::local_socket::{GenericFilePath, ToFsName};
208 let path = std::env::temp_dir()
209 .join(format!("rp-brokered-backend-stub-{endpoint}.sock"));
210 let _ = std::fs::remove_file(&path);
211 ToFsName::to_fs_name::<GenericFilePath>(
212 path.to_string_lossy().as_ref(),
213 )?
214 .into_owned()
215 };
216 let listener = ListenerOptions::new().name(name).create_sync()?;
217 Ok(listener)
218 }
219
220 fn serve(_listener: IpcListener) -> Never {
221 // Reference impl returns by panic. Real implementers run an
222 // accept loop and never return; the `Never` return type
223 // means there is no `return` value they could construct.
224 panic!("StubBackend::serve called");
225 }
226 }
227
228 /// Conformance test #1 (#497 acceptance): the `bind` method has no
229 /// state parameter. Verified by the trait signature itself — if
230 /// this test compiles, the property holds. Equivalent to the
231 /// `trybuild` UI test in #497's acceptance criteria, expressed via
232 /// the type system rather than a separate harness.
233 #[test]
234 fn brokered_backend_bind_returns_listener_from_endpoint_only() {
235 // The line `fn bind(endpoint: &Endpoint) -> Result<...>`
236 // structurally denies a `state` parameter. If a future revision
237 // of the trait added one, this test would fail to compile.
238 fn _shape_check<B: BrokeredBackend>() -> fn(&Endpoint) -> Result<IpcListener, BindError> {
239 B::bind
240 }
241 let _ = _shape_check::<StubBackend>();
242 }
243
244 /// Conformance test #3 (#497 acceptance): an implementation that
245 /// returns an actual listener from `bind` produces a probe-able
246 /// endpoint immediately (no `serve` call required).
247 #[test]
248 fn bind_alone_yields_a_listening_endpoint() {
249 let listener = StubBackend::bind("bind-alone").expect("bind succeeds");
250 // The listener's `accept` is the broker's hang-detection probe
251 // primitive. We don't call `accept` here (would block); we just
252 // verify the listener was constructed by the daemon-side code
253 // path without any state allocation.
254 drop(listener);
255 }
256
257 /// `bootstrap` orchestration calls `bind` first; only if `bind`
258 /// succeeds does it hand off to `serve`. With this stub, `serve`
259 /// panics — so a successful `bind` followed by the panic proves
260 /// the orchestration ordering.
261 #[test]
262 fn bootstrap_calls_bind_then_serve() {
263 let result = std::panic::catch_unwind(|| bootstrap::<StubBackend>("bootstrap-ordering"));
264 let payload = result.expect_err("bootstrap should reach the serve panic");
265 let message = payload
266 .downcast_ref::<&str>()
267 .copied()
268 .or_else(|| payload.downcast_ref::<String>().map(String::as_str))
269 .unwrap_or("");
270 assert!(
271 message.contains("StubBackend::serve called"),
272 "expected to reach serve, got panic payload: {message:?}"
273 );
274 }
275
276 /// A `bind` failure short-circuits before `serve` runs. Uses a
277 /// custom failing backend rather than tweaking the stub so the
278 /// stub's "real bind succeeds" property stays intact for the other
279 /// tests in this module.
280 #[test]
281 fn bootstrap_propagates_bind_failure_without_invoking_serve() {
282 struct FailingBackend;
283 impl BrokeredBackend for FailingBackend {
284 type State = ();
285 fn bind(_endpoint: &Endpoint) -> Result<IpcListener, BindError> {
286 Err(BindError::Other("synthetic failure".into()))
287 }
288 fn serve(_listener: IpcListener) -> Never {
289 panic!("serve must not run when bind fails");
290 }
291 }
292
293 let result =
294 std::panic::catch_unwind(|| bootstrap::<FailingBackend>("bootstrap-failure"));
295 // The orchestrator returns Err — never panics — when bind fails.
296 // catch_unwind preserves that as Ok(Err(...)).
297 let inner = result.expect("bind error should be returned, not a panic");
298 match inner {
299 Err(BindError::Other(msg)) => assert_eq!(msg, "synthetic failure"),
300 other => panic!("expected BindError::Other, got: {other:?}"),
301 }
302 }
303}