Skip to main content

running_process/broker/
brokered_backend.rs

1//! Structurally-enforced fast-bind contract for v2 brokered daemons (#497).
2//!
3//! ## Why
4//!
5//! The v1 launcher (`BackendLauncher::probe_with_service`) requires the
6//! spawned daemon to answer an IPC probe within
7//! `DEFAULT_ENDPOINT_PROBE_TIMEOUT` (~500 ms) after spawn. That budget is
8//! hard-coded in the launcher; nothing in the type system prevents a
9//! daemon implementer from doing 3 s of state loading inside its
10//! bootstrap before the IPC endpoint becomes probe-able. zccache#640
11//! and zccache#784 fix this consumer-side; #497 lifts the invariant
12//! into a broker-owned contract so future brokered services (fbuild
13//! daemon, soldr cache-daemon) cannot silently regress.
14//!
15//! ## Shape (Option A from #497)
16//!
17//! ```text
18//! bind(&endpoint) -> IpcListener        // SYNC, microseconds, no state access
19//!     |
20//!     v
21//! write_lock_file                       // broker-orchestrated
22//!     |
23//!     v
24//! serve(listener) -> !                  // free to spawn_blocking, take 30s warming
25//! ```
26//!
27//! `bind` has no access to `State` — the daemon physically cannot do
28//! disk I/O before the endpoint is up. The fast-bind property becomes
29//! a compile-time consequence of the trait shape.
30//!
31//! Option B (broker-owned bind via inherited file descriptors / named-
32//! pipe handles) is a strictly stronger refinement deferred to a
33//! follow-up; this slice lands Option A as the minimum viable trait
34//! shape so downstream conformance tests + daemon migrations have a
35//! stable target.
36
37use std::error::Error;
38use std::fmt;
39
40/// Reasons a [`BrokeredBackend::bind`] call can fail.
41///
42/// Deliberately small. The broker treats every variant identically
43/// (declare the spawn dead, surface the error to the operator); the
44/// taxonomy exists so daemons can produce useful logs without inventing
45/// their own error types.
46#[derive(Debug)]
47pub enum BindError {
48    /// The endpoint string was not a valid platform path / pipe name.
49    InvalidEndpoint(String),
50
51    /// Another process already holds the endpoint (`EADDRINUSE`,
52    /// `ERROR_PIPE_BUSY`, etc.).
53    AlreadyBound(String),
54
55    /// Underlying OS error from the bind syscall.
56    Io(std::io::Error),
57
58    /// Catch-all for daemon-specific bind failures (permission
59    /// denied via custom security policy, etc.).
60    Other(String),
61}
62
63impl fmt::Display for BindError {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        match self {
66            Self::InvalidEndpoint(s) => write!(f, "invalid endpoint: {s}"),
67            Self::AlreadyBound(s) => write!(f, "endpoint already bound: {s}"),
68            Self::Io(e) => write!(f, "bind io error: {e}"),
69            Self::Other(s) => write!(f, "bind error: {s}"),
70        }
71    }
72}
73
74impl Error for BindError {
75    fn source(&self) -> Option<&(dyn Error + 'static)> {
76        match self {
77            Self::Io(e) => Some(e),
78            _ => None,
79        }
80    }
81}
82
83impl From<std::io::Error> for BindError {
84    fn from(e: std::io::Error) -> Self {
85        Self::Io(e)
86    }
87}
88
89/// Uninhabited type used as the return type of [`BrokeredBackend::serve`]
90/// and [`bootstrap`] to express "this function never returns" on
91/// stable Rust.
92///
93/// `!` (the bare never type) is nightly-only as a return-position type;
94/// an empty enum has the same uninhabitedness guarantee and compiles
95/// on stable. Implementers cannot construct one — the only way to
96/// satisfy the signature is to diverge (loop, panic, exit).
97#[derive(Debug)]
98pub enum Never {}
99
100/// Cross-platform `IpcListener` placeholder.
101///
102/// Until the v2 broker baseline lands a unified listener wrapper
103/// (subsequent slice of #488), this type alias points at the existing
104/// `interprocess::local_socket::Listener` so daemons can start
105/// implementing [`BrokeredBackend`] today. A typedef shift later
106/// won't break implementers since they construct the listener via
107/// platform-neutral helpers, not by naming the type.
108pub type IpcListener = interprocess::local_socket::Listener;
109
110/// Opaque endpoint identifier the broker hands the daemon's [`bind`]
111/// method. Today a plain string (matches `ServiceDefinition`'s endpoint
112/// field shape); will gain structure as the v2 broker baseline grows.
113///
114/// [`bind`]: BrokeredBackend::bind
115pub type Endpoint = str;
116
117/// The fast-bind contract a v2 brokered daemon implements.
118///
119/// Trait method ordering matches the orchestration the broker runs
120/// inside [`bootstrap`]:
121///
122/// 1. [`bind`](BrokeredBackend::bind) — claim the kernel resource.
123///    Synchronous, expected to complete in microseconds. **Takes only
124///    the endpoint** — no `&mut self`, no associated-`State` parameter
125///    — so it is structurally impossible to perform daemon-state
126///    initialization before this returns.
127/// 2. (broker-orchestrated) write the lockfile + report spawn success
128///    to the operator.
129/// 3. [`serve`](BrokeredBackend::serve) — accept connections forever.
130///    Free to `spawn_blocking` for arbitrarily slow state loads; the
131///    broker does not observe this. Clients connecting during the
132///    warmup window queue in the OS accept backlog or see whatever
133///    cold-path semantics the daemon defines.
134pub trait BrokeredBackend {
135    /// Daemon-specific state that survives between requests.
136    ///
137    /// Allocated and consumed inside [`serve`] — never visible to
138    /// [`bind`]. The trait's structural guarantee is precisely that
139    /// state initialization cannot run before the endpoint is bound.
140    type State: Send + 'static;
141
142    /// Bind the IPC listener. **No state access, no disk I/O.**
143    ///
144    /// The broker enforces hang detection by failing the spawn if this
145    /// does not return (or if the resulting listener is not probe-able)
146    /// within `DEFAULT_ENDPOINT_PROBE_TIMEOUT`.
147    fn bind(endpoint: &Endpoint) -> Result<IpcListener, BindError>;
148
149    /// Serve forever on the bound listener.
150    ///
151    /// Free to initialize `State` synchronously, `spawn_blocking` heavy
152    /// loads, or anything else — the broker has already declared the
153    /// daemon "spawned successfully" by this point.
154    ///
155    /// Returns `!` because a brokered daemon's normal control flow is
156    /// to serve until termination signal; clean shutdown is via process
157    /// exit. Implementers that want graceful shutdown plumb it through
158    /// `State` (e.g. an `AtomicBool` checked between accept loops).
159    fn serve(listener: IpcListener) -> Never;
160}
161
162/// Run the broker-side fast-bind orchestration for a `BrokeredBackend`.
163///
164/// 1. Call `B::bind(endpoint)`. Failure → return the error.
165/// 2. Hand the listener to `B::serve`.
166///
167/// In the full v2 baseline, the broker calls this from inside the
168/// spawned daemon's `main()`. Slice 3c–4 of #488 has the v2 broker
169/// scaffold but does not yet exercise this trait; this function is the
170/// integration seam future slices will use.
171pub fn bootstrap<B: BrokeredBackend>(endpoint: &Endpoint) -> Result<Never, BindError> {
172    let listener = B::bind(endpoint)?;
173    // Lockfile write lands in the v2 broker baseline; once it does,
174    // it goes here, between `bind` and `serve`.
175    let never = B::serve(listener);
176    // `serve` returns `Never` (uninhabited) so this `Ok` is never
177    // constructed. Match makes the unreachability explicit.
178    match never {}
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use interprocess::local_socket::ListenerOptions;
185
186    /// Reference implementation used to verify the trait shape compiles.
187    struct StubBackend;
188
189    impl BrokeredBackend for StubBackend {
190        type State = ();
191
192        fn bind(endpoint: &Endpoint) -> Result<IpcListener, BindError> {
193            // Bind a real platform listener at a unique test name so the
194            // path exercises the actual interprocess API surface, not
195            // just trait dispatch. The caller passes a per-test
196            // suffix via `endpoint` so parallel cargo-test runs don't
197            // collide on the same name.
198            #[cfg(windows)]
199            let name = {
200                use interprocess::local_socket::{GenericNamespaced, ToNsName};
201                let bare = format!("rp-brokered-backend-stub-{endpoint}");
202                ToNsName::to_ns_name::<GenericNamespaced>(bare.as_str())?
203                    .into_owned()
204            };
205            #[cfg(unix)]
206            let name = {
207                use interprocess::local_socket::{GenericFilePath, ToFsName};
208                let path = std::env::temp_dir()
209                    .join(format!("rp-brokered-backend-stub-{endpoint}.sock"));
210                let _ = std::fs::remove_file(&path);
211                ToFsName::to_fs_name::<GenericFilePath>(
212                    path.to_string_lossy().as_ref(),
213                )?
214                .into_owned()
215            };
216            let listener = ListenerOptions::new().name(name).create_sync()?;
217            Ok(listener)
218        }
219
220        fn serve(_listener: IpcListener) -> Never {
221            // Reference impl returns by panic. Real implementers run an
222            // accept loop and never return; the `Never` return type
223            // means there is no `return` value they could construct.
224            panic!("StubBackend::serve called");
225        }
226    }
227
228    /// Conformance test #1 (#497 acceptance): the `bind` method has no
229    /// state parameter. Verified by the trait signature itself — if
230    /// this test compiles, the property holds. Equivalent to the
231    /// `trybuild` UI test in #497's acceptance criteria, expressed via
232    /// the type system rather than a separate harness.
233    #[test]
234    fn brokered_backend_bind_returns_listener_from_endpoint_only() {
235        // The line `fn bind(endpoint: &Endpoint) -> Result<...>`
236        // structurally denies a `state` parameter. If a future revision
237        // of the trait added one, this test would fail to compile.
238        fn _shape_check<B: BrokeredBackend>() -> fn(&Endpoint) -> Result<IpcListener, BindError> {
239            B::bind
240        }
241        let _ = _shape_check::<StubBackend>();
242    }
243
244    /// Conformance test #3 (#497 acceptance): an implementation that
245    /// returns an actual listener from `bind` produces a probe-able
246    /// endpoint immediately (no `serve` call required).
247    #[test]
248    fn bind_alone_yields_a_listening_endpoint() {
249        let listener = StubBackend::bind("bind-alone").expect("bind succeeds");
250        // The listener's `accept` is the broker's hang-detection probe
251        // primitive. We don't call `accept` here (would block); we just
252        // verify the listener was constructed by the daemon-side code
253        // path without any state allocation.
254        drop(listener);
255    }
256
257    /// `bootstrap` orchestration calls `bind` first; only if `bind`
258    /// succeeds does it hand off to `serve`. With this stub, `serve`
259    /// panics — so a successful `bind` followed by the panic proves
260    /// the orchestration ordering.
261    #[test]
262    fn bootstrap_calls_bind_then_serve() {
263        let result = std::panic::catch_unwind(|| bootstrap::<StubBackend>("bootstrap-ordering"));
264        let payload = result.expect_err("bootstrap should reach the serve panic");
265        let message = payload
266            .downcast_ref::<&str>()
267            .copied()
268            .or_else(|| payload.downcast_ref::<String>().map(String::as_str))
269            .unwrap_or("");
270        assert!(
271            message.contains("StubBackend::serve called"),
272            "expected to reach serve, got panic payload: {message:?}"
273        );
274    }
275
276    /// A `bind` failure short-circuits before `serve` runs. Uses a
277    /// custom failing backend rather than tweaking the stub so the
278    /// stub's "real bind succeeds" property stays intact for the other
279    /// tests in this module.
280    #[test]
281    fn bootstrap_propagates_bind_failure_without_invoking_serve() {
282        struct FailingBackend;
283        impl BrokeredBackend for FailingBackend {
284            type State = ();
285            fn bind(_endpoint: &Endpoint) -> Result<IpcListener, BindError> {
286                Err(BindError::Other("synthetic failure".into()))
287            }
288            fn serve(_listener: IpcListener) -> Never {
289                panic!("serve must not run when bind fails");
290            }
291        }
292
293        let result =
294            std::panic::catch_unwind(|| bootstrap::<FailingBackend>("bootstrap-failure"));
295        // The orchestrator returns Err — never panics — when bind fails.
296        // catch_unwind preserves that as Ok(Err(...)).
297        let inner = result.expect("bind error should be returned, not a panic");
298        match inner {
299            Err(BindError::Other(msg)) => assert_eq!(msg, "synthetic failure"),
300            other => panic!("expected BindError::Other, got: {other:?}"),
301        }
302    }
303}