Skip to main content

running_process/broker/
brokered_backend.rs

1//! Structurally-enforced fast-bind contract for v2 brokered daemons (#497).
2//!
3//! ## Why
4//!
5//! The v1 launcher (`BackendLauncher::probe_with_service`) requires the
6//! spawned daemon to answer an IPC probe within
7//! `DEFAULT_ENDPOINT_PROBE_TIMEOUT` (~500 ms) after spawn. That budget is
8//! hard-coded in the launcher; nothing in the type system prevents a
9//! daemon implementer from doing 3 s of state loading inside its
10//! bootstrap before the IPC endpoint becomes probe-able. zccache#640
11//! and zccache#784 fix this consumer-side; #497 lifts the invariant
12//! into a broker-owned contract so future brokered services (fbuild
13//! daemon, soldr cache-daemon) cannot silently regress.
14//!
15//! ## Shape (Option A from #497)
16//!
17//! ```text
18//! bind(&endpoint) -> IpcListener        // SYNC, microseconds, no state access
19//!     |
20//!     v
21//! write_lock_file                       // broker-orchestrated
22//!     |
23//!     v
24//! serve(listener) -> !                  // free to spawn_blocking, take 30s warming
25//! ```
26//!
27//! `bind` has no access to `State` — the daemon physically cannot do
28//! disk I/O before the endpoint is up. The fast-bind property becomes
29//! a compile-time consequence of the trait shape.
30//!
31//! Option B (broker-owned bind via inherited file descriptors / named-
32//! pipe handles) is a strictly stronger refinement deferred to a
33//! follow-up; this slice lands Option A as the minimum viable trait
34//! shape so downstream conformance tests + daemon migrations have a
35//! stable target.
36
37use std::error::Error;
38use std::fmt;
39
40/// Reasons a [`BrokeredBackend::bind`] call can fail.
41///
42/// Deliberately small. The broker treats every variant identically
43/// (declare the spawn dead, surface the error to the operator); the
44/// taxonomy exists so daemons can produce useful logs without inventing
45/// their own error types.
46#[derive(Debug)]
47pub enum BindError {
48    /// The endpoint string was not a valid platform path / pipe name.
49    InvalidEndpoint(String),
50
51    /// Another process already holds the endpoint (`EADDRINUSE`,
52    /// `ERROR_PIPE_BUSY`, etc.).
53    AlreadyBound(String),
54
55    /// Underlying OS error from the bind syscall.
56    Io(std::io::Error),
57
58    /// Catch-all for daemon-specific bind failures (permission
59    /// denied via custom security policy, etc.).
60    Other(String),
61}
62
63impl fmt::Display for BindError {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        match self {
66            Self::InvalidEndpoint(s) => write!(f, "invalid endpoint: {s}"),
67            Self::AlreadyBound(s) => write!(f, "endpoint already bound: {s}"),
68            Self::Io(e) => write!(f, "bind io error: {e}"),
69            Self::Other(s) => write!(f, "bind error: {s}"),
70        }
71    }
72}
73
74impl Error for BindError {
75    fn source(&self) -> Option<&(dyn Error + 'static)> {
76        match self {
77            Self::Io(e) => Some(e),
78            _ => None,
79        }
80    }
81}
82
83impl From<std::io::Error> for BindError {
84    fn from(e: std::io::Error) -> Self {
85        Self::Io(e)
86    }
87}
88
89/// Uninhabited type used as the return type of [`BrokeredBackend::serve`]
90/// and [`bootstrap`] to express "this function never returns" on
91/// stable Rust.
92///
93/// `!` (the bare never type) is nightly-only as a return-position type;
94/// an empty enum has the same uninhabitedness guarantee and compiles
95/// on stable. Implementers cannot construct one — the only way to
96/// satisfy the signature is to diverge (loop, panic, exit).
97#[derive(Debug)]
98pub enum Never {}
99
100/// Cross-platform `IpcListener` placeholder.
101///
102/// Until the v2 broker baseline lands a unified listener wrapper
103/// (subsequent slice of #488), this type alias points at the existing
104/// `interprocess::local_socket::Listener` so daemons can start
105/// implementing [`BrokeredBackend`] today. A typedef shift later
106/// won't break implementers since they construct the listener via
107/// platform-neutral helpers, not by naming the type.
108pub type IpcListener = interprocess::local_socket::Listener;
109
110/// Opaque endpoint identifier the broker hands the daemon's [`bind`]
111/// method. Today a plain string (matches `ServiceDefinition`'s endpoint
112/// field shape); will gain structure as the v2 broker baseline grows.
113///
114/// [`bind`]: BrokeredBackend::bind
115pub type Endpoint = str;
116
117/// The fast-bind contract a v2 brokered daemon implements.
118///
119/// Trait method ordering matches the orchestration the broker runs
120/// inside [`bootstrap`]:
121///
122/// 1. [`bind`](BrokeredBackend::bind) — claim the kernel resource.
123///    Synchronous, expected to complete in microseconds. **Takes only
124///    the endpoint** — no `&mut self`, no associated-`State` parameter
125///    — so it is structurally impossible to perform daemon-state
126///    initialization before this returns.
127/// 2. (broker-orchestrated) write the lockfile + report spawn success
128///    to the operator.
129/// 3. [`serve`](BrokeredBackend::serve) — accept connections forever.
130///    Free to `spawn_blocking` for arbitrarily slow state loads; the
131///    broker does not observe this. Clients connecting during the
132///    warmup window queue in the OS accept backlog or see whatever
133///    cold-path semantics the daemon defines.
134pub trait BrokeredBackend {
135    /// Daemon-specific state that survives between requests.
136    ///
137    /// Allocated and consumed inside `serve` — never visible to
138    /// `bind`. The trait's structural guarantee is precisely that
139    /// state initialization cannot run before the endpoint is bound.
140    type State: Send + 'static;
141
142    /// Bind the IPC listener. **No state access, no disk I/O.**
143    ///
144    /// The broker enforces hang detection by failing the spawn if this
145    /// does not return (or if the resulting listener is not probe-able)
146    /// within `DEFAULT_ENDPOINT_PROBE_TIMEOUT`.
147    fn bind(endpoint: &Endpoint) -> Result<IpcListener, BindError>;
148
149    /// Serve forever on the bound listener.
150    ///
151    /// Free to initialize `State` synchronously, `spawn_blocking` heavy
152    /// loads, or anything else — the broker has already declared the
153    /// daemon "spawned successfully" by this point.
154    ///
155    /// Returns `!` because a brokered daemon's normal control flow is
156    /// to serve until termination signal; clean shutdown is via process
157    /// exit. Implementers that want graceful shutdown plumb it through
158    /// `State` (e.g. an `AtomicBool` checked between accept loops).
159    fn serve(listener: IpcListener) -> Never;
160}
161
162/// Run the broker-side fast-bind orchestration for a `BrokeredBackend`.
163///
164/// 1. Call `B::bind(endpoint)`. Failure → propagate the [`BindError`].
165/// 2. Hand the listener to `B::serve`, which never returns.
166///
167/// The function's signature is `Result<(), BindError>` rather than
168/// `Result<Never, …>` so callers don't have to spell `Never` in their
169/// return type just to call `bootstrap`. The body still proves
170/// divergence: `B::serve(listener)` returns the uninhabited [`Never`],
171/// which coerces to `()` via Rust's never-type coercion — control
172/// flow that reaches the end of the body would require constructing
173/// a `Never` value, which the type system forbids.
174///
175/// In the full v2 baseline, the broker calls this from inside the
176/// spawned daemon's `main()`. Slice 3c–4 of #488 has the v2 broker
177/// scaffold but does not yet exercise this trait; this function is the
178/// integration seam future slices will use.
179// `match B::serve(listener) {}` is the documented pattern for proving
180// divergence via an uninhabited type, but rustc's `unreachable_code`
181// lint still flags the match itself because flow analysis types
182// `B::serve` as `Never`. The lint help text recommends precisely this
183// `#[allow]` for the case.
184#[allow(unreachable_code)]
185pub fn bootstrap<B: BrokeredBackend>(endpoint: &Endpoint) -> Result<(), BindError> {
186    let listener = B::bind(endpoint)?;
187    // Lockfile write lands in the v2 broker baseline; once it does,
188    // it goes here, between `bind` and `serve`.
189    match B::serve(listener) {}
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use interprocess::local_socket::ListenerOptions;
196
197    /// Reference implementation used to verify the trait shape compiles.
198    struct StubBackend;
199
200    impl BrokeredBackend for StubBackend {
201        type State = ();
202
203        fn bind(endpoint: &Endpoint) -> Result<IpcListener, BindError> {
204            // Bind a real platform listener at a unique test name so the
205            // path exercises the actual interprocess API surface, not
206            // just trait dispatch. The caller passes a per-test
207            // suffix via `endpoint` so parallel cargo-test runs don't
208            // collide on the same name.
209            #[cfg(windows)]
210            let name = {
211                use interprocess::local_socket::{GenericNamespaced, ToNsName};
212                let bare = format!("rp-brokered-backend-stub-{endpoint}");
213                ToNsName::to_ns_name::<GenericNamespaced>(bare.as_str())?
214                    .into_owned()
215            };
216            #[cfg(unix)]
217            let name = {
218                use interprocess::local_socket::{GenericFilePath, ToFsName};
219                let path = std::env::temp_dir()
220                    .join(format!("rp-brokered-backend-stub-{endpoint}.sock"));
221                let _ = std::fs::remove_file(&path);
222                ToFsName::to_fs_name::<GenericFilePath>(
223                    path.to_string_lossy().as_ref(),
224                )?
225                .into_owned()
226            };
227            let listener = ListenerOptions::new().name(name).create_sync()?;
228            Ok(listener)
229        }
230
231        fn serve(_listener: IpcListener) -> Never {
232            // Reference impl returns by panic. Real implementers run an
233            // accept loop and never return; the `Never` return type
234            // means there is no `return` value they could construct.
235            panic!("StubBackend::serve called");
236        }
237    }
238
239    /// Conformance test #1 (#497 acceptance): the `bind` method has no
240    /// state parameter. Verified by the trait signature itself — if
241    /// this test compiles, the property holds. Equivalent to the
242    /// `trybuild` UI test in #497's acceptance criteria, expressed via
243    /// the type system rather than a separate harness.
244    #[test]
245    fn brokered_backend_bind_returns_listener_from_endpoint_only() {
246        // The line `fn bind(endpoint: &Endpoint) -> Result<...>`
247        // structurally denies a `state` parameter. If a future revision
248        // of the trait added one, this test would fail to compile.
249        fn _shape_check<B: BrokeredBackend>() -> fn(&Endpoint) -> Result<IpcListener, BindError> {
250            B::bind
251        }
252        let _ = _shape_check::<StubBackend>();
253    }
254
255    /// Conformance test #3 (#497 acceptance): an implementation that
256    /// returns an actual listener from `bind` produces a probe-able
257    /// endpoint immediately (no `serve` call required).
258    #[test]
259    fn bind_alone_yields_a_listening_endpoint() {
260        let listener = StubBackend::bind("bind-alone").expect("bind succeeds");
261        // The listener's `accept` is the broker's hang-detection probe
262        // primitive. We don't call `accept` here (would block); we just
263        // verify the listener was constructed by the daemon-side code
264        // path without any state allocation.
265        drop(listener);
266    }
267
268    /// `bootstrap` orchestration calls `bind` first; only if `bind`
269    /// succeeds does it hand off to `serve`. With this stub, `serve`
270    /// panics — so a successful `bind` followed by the panic proves
271    /// the orchestration ordering.
272    #[test]
273    fn bootstrap_calls_bind_then_serve() {
274        let result = std::panic::catch_unwind(|| bootstrap::<StubBackend>("bootstrap-ordering"));
275        let payload = result.expect_err("bootstrap should reach the serve panic");
276        let message = payload
277            .downcast_ref::<&str>()
278            .copied()
279            .or_else(|| payload.downcast_ref::<String>().map(String::as_str))
280            .unwrap_or("");
281        assert!(
282            message.contains("StubBackend::serve called"),
283            "expected to reach serve, got panic payload: {message:?}"
284        );
285    }
286
287    /// A `bind` failure short-circuits before `serve` runs. Uses a
288    /// custom failing backend rather than tweaking the stub so the
289    /// stub's "real bind succeeds" property stays intact for the other
290    /// tests in this module.
291    #[test]
292    fn bootstrap_propagates_bind_failure_without_invoking_serve() {
293        struct FailingBackend;
294        impl BrokeredBackend for FailingBackend {
295            type State = ();
296            fn bind(_endpoint: &Endpoint) -> Result<IpcListener, BindError> {
297                Err(BindError::Other("synthetic failure".into()))
298            }
299            fn serve(_listener: IpcListener) -> Never {
300                panic!("serve must not run when bind fails");
301            }
302        }
303
304        let result =
305            std::panic::catch_unwind(|| bootstrap::<FailingBackend>("bootstrap-failure"));
306        // The orchestrator returns Err — never panics — when bind fails.
307        // catch_unwind preserves that as Ok(Err(...)).
308        let inner = result.expect("bind error should be returned, not a panic");
309        match inner {
310            Err(BindError::Other(msg)) => assert_eq!(msg, "synthetic failure"),
311            other => panic!("expected BindError::Other, got: {other:?}"),
312        }
313    }
314}