running_process/broker/server/handoff/orchestrate_unix.rs
1//! Production-shaped orchestration of one Unix `SCM_RIGHTS` handle-passing
2//! handoff (#354, slice 4).
3//!
4//! Mirrors the Windows sequence in [`super::orchestrate`] with the stages a
5//! Unix handoff actually has. `SCM_RIGHTS` carries the duplicated file
6//! descriptor *inside* the message, so there is no separate "duplicate"
7//! step followed by a "deliver the handle value" step: one
8//! `sendmsg(SCM_RIGHTS)` call both duplicates the descriptor into the
9//! backend and tells the backend about it. The broker-side sequence is:
10//!
11//! 1. send the broker-held connection fd plus the one-time token to the
12//! backend handoff socket ([`super::try_send_scm_rights`]),
13//! 2. wait for the backend acknowledgement observed by a
14//! [`UnixHandoffAckWait`] channel, and
15//! 3. complete the pending entry in the [`HandoffAckRegistry`], consuming
16//! the one-time token exactly once.
17//!
18//! Any failure at any step abandons the handoff: the one-time token is
19//! revoked, the pending ACK entry is removed, and the caller receives
20//! [`UnixHandoffOutcome::FallbackToReconnect`]. The negotiated
21//! `backend_pipe` reconnect path stays authoritative; orchestration
22//! failures are silent optimization failures, never client errors, and
23//! this function never panics on transport, delivery, or registry errors.
24//!
25//! # Descriptor ownership contract
26//!
27//! Unlike the Windows `DuplicateHandle` path there is no cross-process
28//! leak the broker cannot clean up: the broker keeps ownership of its own
29//! `request.fd` at every stage (`SCM_RIGHTS` sends a *duplicate*), so on
30//! fallback the caller may simply close the broker-held descriptor. What
31//! the broker *cannot* undo is a duplicate that already reached the
32//! backend: once the send succeeded, the backend holds its own descriptor
33//! until it closes it. [`UnixHandoffFallback::fd_reached_backend`] records
34//! that honestly so callers can log it instead of pretending the duplicate
35//! was reclaimed.
36
37use std::time::Instant;
38
39use super::ack::HandoffAckRegistry;
40use super::fallback::{HandoffFallbackDecision, HandoffFallbackReason};
41use super::handoff_token::{HandoffToken, HandoffTokenStore};
42use super::orchestrate::HandoffDeliveryError;
43use super::unix::{
44 try_send_scm_rights, ScmRightsAttempt, ScmRightsResult, ScmRightsSuccess, UnixFileDescriptor,
45 UnixHandoffSocket,
46};
47use super::AcknowledgedHandoff;
48
49/// Inputs for one orchestrated Unix `SCM_RIGHTS` handle-passing handoff.
50#[derive(Clone, Debug, PartialEq, Eq)]
51pub struct UnixHandoffRequest {
52 /// Broker-owned connection file descriptor to pass to the backend.
53 pub fd: UnixFileDescriptor,
54 /// Backend handoff socket that should receive the file descriptor.
55 pub backend_socket: UnixHandoffSocket,
56 /// One-time token issued at Hello time and registered for an ACK.
57 pub token: HandoffToken,
58}
59
60impl UnixHandoffRequest {
61 /// Build inputs for one orchestrated handoff.
62 pub fn new(
63 fd: UnixFileDescriptor,
64 backend_socket: UnixHandoffSocket,
65 token: HandoffToken,
66 ) -> Self {
67 Self {
68 fd,
69 backend_socket,
70 token,
71 }
72 }
73}
74
75/// Acknowledgement channel observing the backend's adoption of a
76/// handed-off connection.
77///
78/// `SCM_RIGHTS` already delivers the descriptor and token in one message,
79/// so unlike the Windows [`super::HandoffDelivery`] trait there is no
80/// separate deliver step to abstract — only the ACK wait. Production wire
81/// delivery of the ACK does not exist yet (the v1 envelope reserves no
82/// backend-to-broker control frame); the orchestration treats the wait as
83/// a pluggable step so the sequencing and fallback contract are real
84/// today.
85pub trait UnixHandoffAckWait {
86 /// Block until the backend acknowledges adopting the handed-off
87 /// connection, or until `deadline`.
88 ///
89 /// Returns the instant the acknowledgement was observed. The
90 /// orchestrator still validates that instant against the
91 /// [`HandoffAckRegistry`] deadline registered at issuance, so an ACK
92 /// channel that misjudges the deadline cannot complete an overdue
93 /// handoff.
94 fn await_backend_ack(
95 &mut self,
96 token: &HandoffToken,
97 deadline: Instant,
98 ) -> Result<Instant, HandoffDeliveryError>;
99}
100
101/// Step of the orchestration at which a Unix handoff was abandoned.
102#[derive(Clone, Copy, Debug, PartialEq, Eq)]
103pub enum UnixHandoffStage {
104 /// `sendmsg(SCM_RIGHTS)` to the backend handoff socket failed.
105 Send,
106 /// The backend acknowledgement was not observed before the deadline.
107 AwaitAck,
108 /// The ACK registry rejected the acknowledgement (overdue or already
109 /// expired by a sweep).
110 Acknowledge,
111}
112
113/// A handoff completed end to end: descriptor sent with its token and
114/// acknowledged before the registry deadline.
115#[derive(Clone, Debug, PartialEq, Eq)]
116pub struct CompletedUnixHandoff {
117 /// Successful `SCM_RIGHTS` send into the backend.
118 pub sent: ScmRightsSuccess,
119 /// Timely backend acknowledgement that consumed the one-time token.
120 pub acknowledged: AcknowledgedHandoff,
121}
122
123/// A handoff abandoned at some orchestration stage.
124///
125/// The one-time token has been revoked and the pending ACK entry removed;
126/// the caller must keep using the negotiated `backend_pipe` reconnect path.
127/// The broker still owns `broker_fd` and may close it.
128#[derive(Clone, Debug, PartialEq, Eq)]
129pub struct UnixHandoffFallback {
130 /// Stage at which the handoff was abandoned.
131 pub stage: UnixHandoffStage,
132 /// Silent reconnect fallback decision for the client-visible contract.
133 pub decision: HandoffFallbackDecision,
134 /// Broker-owned connection descriptor. `SCM_RIGHTS` only ever sends a
135 /// duplicate, so unlike the Windows leak contract the broker retains
136 /// ownership and may close this descriptor now that the handoff is
137 /// abandoned.
138 pub broker_fd: UnixFileDescriptor,
139 /// True when the `SCM_RIGHTS` send already succeeded before the
140 /// failure: the backend holds a duplicated descriptor the broker
141 /// cannot reclaim; it lives until the backend closes it. The revoked
142 /// token guarantees the backend can never *adopt* that connection.
143 pub fd_reached_backend: bool,
144 /// Human-readable failure detail for logs.
145 pub detail: String,
146}
147
148/// Outcome of one orchestrated Unix `SCM_RIGHTS` handle-passing handoff.
149#[derive(Clone, Debug, PartialEq, Eq)]
150pub enum UnixHandoffOutcome {
151 /// The backend adopted the connection before the ACK deadline.
152 Completed(CompletedUnixHandoff),
153 /// The handoff was abandoned; the client reconnects via `backend_pipe`.
154 FallbackToReconnect(UnixHandoffFallback),
155}
156
157impl UnixHandoffOutcome {
158 /// Return true when the handoff completed end to end.
159 pub fn is_completed(&self) -> bool {
160 matches!(self, Self::Completed(_))
161 }
162
163 /// Return the fallback details when the handoff was abandoned.
164 pub fn fallback(&self) -> Option<&UnixHandoffFallback> {
165 match self {
166 Self::Completed(_) => None,
167 Self::FallbackToReconnect(fallback) => Some(fallback),
168 }
169 }
170}
171
172/// Run one production-shaped Unix handoff with the real
173/// `sendmsg(SCM_RIGHTS)` transport.
174///
175/// The token in `request` must have been issued from `tokens` and
176/// registered pending in `acks` (the Hello path does both). On success the
177/// token is consumed exactly once; on any failure it is revoked and the
178/// outcome degrades to the `backend_pipe` reconnect fallback. On non-Unix
179/// targets the transport reports `UnsupportedPlatform` and the outcome is
180/// the same non-panicking fallback.
181pub fn execute_unix_handoff<W>(
182 tokens: &mut HandoffTokenStore,
183 acks: &mut HandoffAckRegistry,
184 request: &UnixHandoffRequest,
185 ack_wait: &mut W,
186) -> UnixHandoffOutcome
187where
188 W: UnixHandoffAckWait + ?Sized,
189{
190 execute_unix_handoff_with_transport(tokens, acks, request, try_send_scm_rights, ack_wait)
191}
192
193/// Run one orchestrated Unix handoff with an explicit send transport.
194///
195/// Platform-neutral tests inject a mock transport here; production callers
196/// use [`execute_unix_handoff`].
197pub fn execute_unix_handoff_with_transport<T, W>(
198 tokens: &mut HandoffTokenStore,
199 acks: &mut HandoffAckRegistry,
200 request: &UnixHandoffRequest,
201 transport: T,
202 ack_wait: &mut W,
203) -> UnixHandoffOutcome
204where
205 T: FnOnce(&ScmRightsAttempt) -> ScmRightsResult,
206 W: UnixHandoffAckWait + ?Sized,
207{
208 let attempt = ScmRightsAttempt::new(request.fd, request.backend_socket.clone(), request.token);
209 let sent = match transport(&attempt) {
210 Ok(success) => success,
211 Err(error) => {
212 acks.abandon(tokens, &request.token);
213 return abandoned(
214 UnixHandoffStage::Send,
215 error.fallback_decision(),
216 request.fd,
217 false,
218 error.to_string(),
219 );
220 }
221 };
222
223 let deadline = ack_deadline_from(acks, Instant::now());
224 let acknowledged_at = match ack_wait.await_backend_ack(&request.token, deadline) {
225 Ok(at) => at,
226 Err(error) => {
227 acks.abandon(tokens, &request.token);
228 return abandoned(
229 UnixHandoffStage::AwaitAck,
230 HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
231 request.fd,
232 true,
233 error.to_string(),
234 );
235 }
236 };
237
238 match acks.acknowledge(tokens, &request.token, acknowledged_at) {
239 Ok(acknowledged) => {
240 UnixHandoffOutcome::Completed(CompletedUnixHandoff { sent, acknowledged })
241 }
242 Err(error) => {
243 // AckDeadlineExceeded already revoked the token; TokenNotPending
244 // means a sweep expired it. Revoke defensively either way so no
245 // error path can leave the one-time token presentable.
246 tokens.revoke(&request.token);
247 abandoned(
248 UnixHandoffStage::Acknowledge,
249 HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
250 request.fd,
251 true,
252 error.to_string(),
253 )
254 }
255 }
256}
257
258fn abandoned(
259 stage: UnixHandoffStage,
260 decision: HandoffFallbackDecision,
261 broker_fd: UnixFileDescriptor,
262 fd_reached_backend: bool,
263 detail: String,
264) -> UnixHandoffOutcome {
265 UnixHandoffOutcome::FallbackToReconnect(UnixHandoffFallback {
266 stage,
267 decision,
268 broker_fd,
269 fd_reached_backend,
270 detail,
271 })
272}
273
274fn ack_deadline_from(acks: &HandoffAckRegistry, now: Instant) -> Instant {
275 now.checked_add(acks.ack_deadline()).unwrap_or(now)
276}