Skip to main content

running_process/broker/server/handoff/
orchestrate_unix.rs

1//! Production-shaped orchestration of one Unix `SCM_RIGHTS` handle-passing
2//! handoff (#354, slice 4).
3//!
4//! Mirrors the Windows sequence in [`super::orchestrate`] with the stages a
5//! Unix handoff actually has. `SCM_RIGHTS` carries the duplicated file
6//! descriptor *inside* the message, so there is no separate "duplicate"
7//! step followed by a "deliver the handle value" step: one
8//! `sendmsg(SCM_RIGHTS)` call both duplicates the descriptor into the
9//! backend and tells the backend about it. The broker-side sequence is:
10//!
11//! 1. send the broker-held connection fd plus the one-time token to the
12//!    backend handoff socket ([`super::try_send_scm_rights`]),
13//! 2. wait for the backend acknowledgement observed by a
14//!    [`UnixHandoffAckWait`] channel, and
15//! 3. complete the pending entry in the [`HandoffAckRegistry`], consuming
16//!    the one-time token exactly once.
17//!
18//! Any failure at any step abandons the handoff: the one-time token is
19//! revoked, the pending ACK entry is removed, and the caller receives
20//! [`UnixHandoffOutcome::FallbackToReconnect`]. The negotiated
21//! `backend_pipe` reconnect path stays authoritative; orchestration
22//! failures are silent optimization failures, never client errors, and
23//! this function never panics on transport, delivery, or registry errors.
24//!
25//! # Descriptor ownership contract
26//!
27//! Unlike the Windows `DuplicateHandle` path there is no cross-process
28//! leak the broker cannot clean up: the broker keeps ownership of its own
29//! `request.fd` at every stage (`SCM_RIGHTS` sends a *duplicate*), so on
30//! fallback the caller may simply close the broker-held descriptor. What
31//! the broker *cannot* undo is a duplicate that already reached the
32//! backend: once the send succeeded, the backend holds its own descriptor
33//! until it closes it. [`UnixHandoffFallback::fd_reached_backend`] records
34//! that honestly so callers can log it instead of pretending the duplicate
35//! was reclaimed.
36
37use std::time::Instant;
38
39use super::ack::HandoffAckRegistry;
40use super::fallback::{HandoffFallbackDecision, HandoffFallbackReason};
41use super::handoff_token::{HandoffToken, HandoffTokenStore};
42use super::orchestrate::HandoffDeliveryError;
43use super::unix::{
44    try_send_scm_rights, ScmRightsAttempt, ScmRightsResult, ScmRightsSuccess, UnixFileDescriptor,
45    UnixHandoffSocket,
46};
47use super::AcknowledgedHandoff;
48
49/// Inputs for one orchestrated Unix `SCM_RIGHTS` handle-passing handoff.
50#[derive(Clone, Debug, PartialEq, Eq)]
51pub struct UnixHandoffRequest {
52    /// Broker-owned connection file descriptor to pass to the backend.
53    pub fd: UnixFileDescriptor,
54    /// Backend handoff socket that should receive the file descriptor.
55    pub backend_socket: UnixHandoffSocket,
56    /// One-time token issued at Hello time and registered for an ACK.
57    pub token: HandoffToken,
58}
59
60impl UnixHandoffRequest {
61    /// Build inputs for one orchestrated handoff.
62    pub fn new(
63        fd: UnixFileDescriptor,
64        backend_socket: UnixHandoffSocket,
65        token: HandoffToken,
66    ) -> Self {
67        Self {
68            fd,
69            backend_socket,
70            token,
71        }
72    }
73}
74
75/// Acknowledgement channel observing the backend's adoption of a
76/// handed-off connection.
77///
78/// `SCM_RIGHTS` already delivers the descriptor and token in one message,
79/// so unlike the Windows [`super::HandoffDelivery`] trait there is no
80/// separate deliver step to abstract — only the ACK wait. Production wire
81/// delivery of the ACK does not exist yet (the v1 envelope reserves no
82/// backend-to-broker control frame); the orchestration treats the wait as
83/// a pluggable step so the sequencing and fallback contract are real
84/// today.
85pub trait UnixHandoffAckWait {
86    /// Block until the backend acknowledges adopting the handed-off
87    /// connection, or until `deadline`.
88    ///
89    /// Returns the instant the acknowledgement was observed. The
90    /// orchestrator still validates that instant against the
91    /// [`HandoffAckRegistry`] deadline registered at issuance, so an ACK
92    /// channel that misjudges the deadline cannot complete an overdue
93    /// handoff.
94    fn await_backend_ack(
95        &mut self,
96        token: &HandoffToken,
97        deadline: Instant,
98    ) -> Result<Instant, HandoffDeliveryError>;
99}
100
101/// Step of the orchestration at which a Unix handoff was abandoned.
102#[derive(Clone, Copy, Debug, PartialEq, Eq)]
103pub enum UnixHandoffStage {
104    /// `sendmsg(SCM_RIGHTS)` to the backend handoff socket failed.
105    Send,
106    /// The backend acknowledgement was not observed before the deadline.
107    AwaitAck,
108    /// The ACK registry rejected the acknowledgement (overdue or already
109    /// expired by a sweep).
110    Acknowledge,
111}
112
113/// A handoff completed end to end: descriptor sent with its token and
114/// acknowledged before the registry deadline.
115#[derive(Clone, Debug, PartialEq, Eq)]
116pub struct CompletedUnixHandoff {
117    /// Successful `SCM_RIGHTS` send into the backend.
118    pub sent: ScmRightsSuccess,
119    /// Timely backend acknowledgement that consumed the one-time token.
120    pub acknowledged: AcknowledgedHandoff,
121}
122
123/// A handoff abandoned at some orchestration stage.
124///
125/// The one-time token has been revoked and the pending ACK entry removed;
126/// the caller must keep using the negotiated `backend_pipe` reconnect path.
127/// The broker still owns `broker_fd` and may close it.
128#[derive(Clone, Debug, PartialEq, Eq)]
129pub struct UnixHandoffFallback {
130    /// Stage at which the handoff was abandoned.
131    pub stage: UnixHandoffStage,
132    /// Silent reconnect fallback decision for the client-visible contract.
133    pub decision: HandoffFallbackDecision,
134    /// Broker-owned connection descriptor. `SCM_RIGHTS` only ever sends a
135    /// duplicate, so unlike the Windows leak contract the broker retains
136    /// ownership and may close this descriptor now that the handoff is
137    /// abandoned.
138    pub broker_fd: UnixFileDescriptor,
139    /// True when the `SCM_RIGHTS` send already succeeded before the
140    /// failure: the backend holds a duplicated descriptor the broker
141    /// cannot reclaim; it lives until the backend closes it. The revoked
142    /// token guarantees the backend can never *adopt* that connection.
143    pub fd_reached_backend: bool,
144    /// Human-readable failure detail for logs.
145    pub detail: String,
146}
147
148/// Outcome of one orchestrated Unix `SCM_RIGHTS` handle-passing handoff.
149#[derive(Clone, Debug, PartialEq, Eq)]
150pub enum UnixHandoffOutcome {
151    /// The backend adopted the connection before the ACK deadline.
152    Completed(CompletedUnixHandoff),
153    /// The handoff was abandoned; the client reconnects via `backend_pipe`.
154    FallbackToReconnect(UnixHandoffFallback),
155}
156
157impl UnixHandoffOutcome {
158    /// Return true when the handoff completed end to end.
159    pub fn is_completed(&self) -> bool {
160        matches!(self, Self::Completed(_))
161    }
162
163    /// Return the fallback details when the handoff was abandoned.
164    pub fn fallback(&self) -> Option<&UnixHandoffFallback> {
165        match self {
166            Self::Completed(_) => None,
167            Self::FallbackToReconnect(fallback) => Some(fallback),
168        }
169    }
170}
171
172/// Run one production-shaped Unix handoff with the real
173/// `sendmsg(SCM_RIGHTS)` transport.
174///
175/// The token in `request` must have been issued from `tokens` and
176/// registered pending in `acks` (the Hello path does both). On success the
177/// token is consumed exactly once; on any failure it is revoked and the
178/// outcome degrades to the `backend_pipe` reconnect fallback. On non-Unix
179/// targets the transport reports `UnsupportedPlatform` and the outcome is
180/// the same non-panicking fallback.
181pub fn execute_unix_handoff<W>(
182    tokens: &mut HandoffTokenStore,
183    acks: &mut HandoffAckRegistry,
184    request: &UnixHandoffRequest,
185    ack_wait: &mut W,
186) -> UnixHandoffOutcome
187where
188    W: UnixHandoffAckWait + ?Sized,
189{
190    execute_unix_handoff_with_transport(tokens, acks, request, try_send_scm_rights, ack_wait)
191}
192
193/// Run one orchestrated Unix handoff with an explicit send transport.
194///
195/// Platform-neutral tests inject a mock transport here; production callers
196/// use [`execute_unix_handoff`].
197pub fn execute_unix_handoff_with_transport<T, W>(
198    tokens: &mut HandoffTokenStore,
199    acks: &mut HandoffAckRegistry,
200    request: &UnixHandoffRequest,
201    transport: T,
202    ack_wait: &mut W,
203) -> UnixHandoffOutcome
204where
205    T: FnOnce(&ScmRightsAttempt) -> ScmRightsResult,
206    W: UnixHandoffAckWait + ?Sized,
207{
208    let attempt = ScmRightsAttempt::new(request.fd, request.backend_socket.clone(), request.token);
209    let sent = match transport(&attempt) {
210        Ok(success) => success,
211        Err(error) => {
212            acks.abandon(tokens, &request.token);
213            return abandoned(
214                UnixHandoffStage::Send,
215                error.fallback_decision(),
216                request.fd,
217                false,
218                error.to_string(),
219            );
220        }
221    };
222
223    let deadline = ack_deadline_from(acks, Instant::now());
224    let acknowledged_at = match ack_wait.await_backend_ack(&request.token, deadline) {
225        Ok(at) => at,
226        Err(error) => {
227            acks.abandon(tokens, &request.token);
228            return abandoned(
229                UnixHandoffStage::AwaitAck,
230                HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
231                request.fd,
232                true,
233                error.to_string(),
234            );
235        }
236    };
237
238    match acks.acknowledge(tokens, &request.token, acknowledged_at) {
239        Ok(acknowledged) => {
240            UnixHandoffOutcome::Completed(CompletedUnixHandoff { sent, acknowledged })
241        }
242        Err(error) => {
243            // AckDeadlineExceeded already revoked the token; TokenNotPending
244            // means a sweep expired it. Revoke defensively either way so no
245            // error path can leave the one-time token presentable.
246            tokens.revoke(&request.token);
247            abandoned(
248                UnixHandoffStage::Acknowledge,
249                HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
250                request.fd,
251                true,
252                error.to_string(),
253            )
254        }
255    }
256}
257
258fn abandoned(
259    stage: UnixHandoffStage,
260    decision: HandoffFallbackDecision,
261    broker_fd: UnixFileDescriptor,
262    fd_reached_backend: bool,
263    detail: String,
264) -> UnixHandoffOutcome {
265    UnixHandoffOutcome::FallbackToReconnect(UnixHandoffFallback {
266        stage,
267        decision,
268        broker_fd,
269        fd_reached_backend,
270        detail,
271    })
272}
273
274fn ack_deadline_from(acks: &HandoffAckRegistry, now: Instant) -> Instant {
275    now.checked_add(acks.ack_deadline()).unwrap_or(now)
276}