running_process/broker/server/handoff/orchestrate.rs
1//! Production-shaped orchestration of one Windows handle-passing handoff
2//! (#354, slice 3).
3//!
4//! This module composes the pieces landed by earlier slices into one
5//! broker-side sequence:
6//!
7//! 1. duplicate the broker-held client pipe into the verified backend
8//! process ([`super::try_duplicate_handle`], or the verified
9//! [`BackendHandle`](crate::broker::backend_handle::BackendHandle)
10//! bridge),
11//! 2. deliver the duplicated handle value plus the one-time token to the
12//! backend through a [`HandoffDelivery`] implementation,
13//! 3. wait for the backend acknowledgement observed by the delivery
14//! channel, and
15//! 4. complete the pending entry in the [`HandoffAckRegistry`], consuming
16//! the one-time token exactly once.
17//!
18//! Any failure at any step abandons the handoff: the one-time token is
19//! revoked, the pending ACK entry is removed, and the caller receives
20//! [`WindowsHandoffOutcome::FallbackToReconnect`]. The negotiated
21//! `backend_pipe` reconnect path stays authoritative; orchestration
22//! failures are silent optimization failures, never client errors, and
23//! this function never panics on transport, delivery, or registry errors.
24//!
25//! # Delivery mechanism
26//!
27//! Delivery of the `(handle value, token)` pair is abstracted behind the
28//! [`HandoffDelivery`] trait so the orchestration sequence, token
29//! lifecycle, and fallback contract stay transport-agnostic. The
30//! production implementation is
31//! [`WireHandoffDelivery`](super::wire::WireHandoffDelivery) (#354 slice
32//! 6), which sends a `HandoffOffer` frame over a framed broker↔backend
33//! control connection and waits for the matching `HandoffAck`; tests also
34//! deliver over the child-helper stdin/stdout protocol from the #358/#363
35//! smoke tests.
36//!
37//! # Handle leak contract
38//!
39//! `DuplicateHandle` places the duplicated handle directly into the
40//! *backend's* handle table. Once duplication has succeeded, the broker
41//! cannot close that handle: closing a handle owned by another process
42//! would require a second `DUPLICATE_CLOSE_SOURCE` round-trip that is not
43//! part of this slice. If delivery or acknowledgement fails after
44//! duplication, the duplicated handle therefore leaks in the backend
45//! process until the backend exits. The outcome records it in
46//! [`WindowsHandoffFallback::leaked_backend_handle`] so callers can log
47//! and monitor the leak honestly instead of pretending cleanup happened.
48
49use std::time::Instant;
50
51use super::ack::{HandoffAckRegistry, PendingHandoffBackend};
52use super::fallback::{HandoffFallbackDecision, HandoffFallbackReason};
53use super::handoff_token::{HandoffToken, HandoffTokenStore};
54use super::windows::{
55 try_duplicate_handle, DuplicateHandleAttempt, DuplicateHandleResult, DuplicateHandleSuccess,
56 WindowsHandleValue,
57};
58use super::AcknowledgedHandoff;
59
60/// Inputs for one orchestrated Windows handle-passing handoff.
61#[derive(Clone, Copy, Debug, PartialEq, Eq)]
62pub struct WindowsHandoffRequest {
63 /// Broker-owned pipe handle to duplicate into the backend.
64 pub pipe_handle: WindowsHandleValue,
65 /// Verified backend process ID receiving the duplicated handle.
66 pub backend_pid: u32,
67 /// One-time token issued at Hello time and registered for an ACK.
68 pub token: HandoffToken,
69}
70
71impl WindowsHandoffRequest {
72 /// Build inputs for one orchestrated handoff.
73 pub fn new(pipe_handle: WindowsHandleValue, backend_pid: u32, token: HandoffToken) -> Self {
74 Self {
75 pipe_handle,
76 backend_pid,
77 token,
78 }
79 }
80}
81
82/// Delivery channel carrying the duplicated handle value and one-time token
83/// from the broker to the backend process.
84///
85/// Production wire delivery does not exist yet (see the module docs); the
86/// orchestration treats delivery as a pluggable step so the sequencing and
87/// fallback contract are real today.
88pub trait HandoffDelivery {
89 /// Deliver the duplicated handle value and paired token to the backend.
90 ///
91 /// The handle value is only meaningful inside the backend's handle
92 /// table. A returned error means the backend cannot be assumed to know
93 /// about the handle; the orchestrator abandons the handoff.
94 fn deliver(
95 &mut self,
96 handle: WindowsHandleValue,
97 token: &HandoffToken,
98 ) -> Result<(), HandoffDeliveryError>;
99
100 /// Block until the backend acknowledges adopting the handed-off
101 /// connection, or until `deadline`.
102 ///
103 /// Returns the instant the acknowledgement was observed. The
104 /// orchestrator still validates that instant against the
105 /// [`HandoffAckRegistry`] deadline registered at issuance, so a
106 /// delivery channel that misjudges the deadline cannot complete an
107 /// overdue handoff.
108 fn await_backend_ack(
109 &mut self,
110 token: &HandoffToken,
111 deadline: Instant,
112 ) -> Result<Instant, HandoffDeliveryError>;
113}
114
115/// Errors surfaced by a [`HandoffDelivery`] channel.
116#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
117pub enum HandoffDeliveryError {
118 /// The handle value and token could not be delivered to the backend.
119 #[error("handoff delivery to backend failed: {detail}")]
120 DeliveryFailed {
121 /// Human-readable failure detail for logs.
122 detail: String,
123 },
124 /// The backend acknowledgement was not observed before the deadline.
125 #[error("backend handoff ACK was not observed: {detail}")]
126 AckNotObserved {
127 /// Human-readable failure detail for logs.
128 detail: String,
129 },
130}
131
132/// Step of the orchestration at which a handoff was abandoned.
133#[derive(Clone, Copy, Debug, PartialEq, Eq)]
134pub enum WindowsHandoffStage {
135 /// `DuplicateHandle` into the backend process failed.
136 Duplicate,
137 /// Delivering the handle value and token to the backend failed.
138 Deliver,
139 /// The backend acknowledgement was not observed before the deadline.
140 AwaitAck,
141 /// The ACK registry rejected the acknowledgement (overdue or already
142 /// expired by a sweep).
143 Acknowledge,
144}
145
146/// A handoff completed end to end: handle duplicated, delivered, and
147/// acknowledged before the registry deadline.
148#[derive(Clone, Debug, PartialEq, Eq)]
149pub struct CompletedWindowsHandoff {
150 /// Successful duplication into the backend handle table.
151 pub duplicated: DuplicateHandleSuccess,
152 /// Timely backend acknowledgement that consumed the one-time token.
153 pub acknowledged: AcknowledgedHandoff,
154}
155
156/// A handoff abandoned at some orchestration stage.
157///
158/// The one-time token has been revoked and the pending ACK entry removed;
159/// the caller must keep using the negotiated `backend_pipe` reconnect path.
160#[derive(Clone, Debug, PartialEq, Eq)]
161pub struct WindowsHandoffFallback {
162 /// Stage at which the handoff was abandoned.
163 pub stage: WindowsHandoffStage,
164 /// Silent reconnect fallback decision for the client-visible contract.
165 pub decision: HandoffFallbackDecision,
166 /// Handle already duplicated into the backend's handle table when the
167 /// failure occurred. The broker cannot close another process's handle
168 /// (see the module-level leak contract); it leaks in the backend until
169 /// that process exits. `None` when duplication itself failed.
170 pub leaked_backend_handle: Option<WindowsHandleValue>,
171 /// Human-readable failure detail for logs.
172 pub detail: String,
173}
174
175/// Outcome of one orchestrated Windows handle-passing handoff.
176#[derive(Clone, Debug, PartialEq, Eq)]
177pub enum WindowsHandoffOutcome {
178 /// The backend adopted the connection before the ACK deadline.
179 Completed(CompletedWindowsHandoff),
180 /// The handoff was abandoned; the client reconnects via `backend_pipe`.
181 FallbackToReconnect(WindowsHandoffFallback),
182}
183
184impl WindowsHandoffOutcome {
185 /// Return true when the handoff completed end to end.
186 pub fn is_completed(&self) -> bool {
187 matches!(self, Self::Completed(_))
188 }
189
190 /// Return the fallback details when the handoff was abandoned.
191 pub fn fallback(&self) -> Option<&WindowsHandoffFallback> {
192 match self {
193 Self::Completed(_) => None,
194 Self::FallbackToReconnect(fallback) => Some(fallback),
195 }
196 }
197}
198
199/// Run one production-shaped Windows handoff with the real
200/// `DuplicateHandle` transport.
201///
202/// The token in `request` must have been issued from `tokens` and
203/// registered pending in `acks` (the Hello path does both). On success the
204/// token is consumed exactly once; on any failure it is revoked and the
205/// outcome degrades to the `backend_pipe` reconnect fallback.
206pub fn execute_windows_handoff<D>(
207 tokens: &mut HandoffTokenStore,
208 acks: &mut HandoffAckRegistry,
209 request: &WindowsHandoffRequest,
210 delivery: &mut D,
211) -> WindowsHandoffOutcome
212where
213 D: HandoffDelivery + ?Sized,
214{
215 execute_windows_handoff_with_transport(tokens, acks, request, try_duplicate_handle, delivery)
216}
217
218/// Run one production-shaped Windows handoff for a verified backend.
219///
220/// Composes the [`BackendHandle`](crate::broker::backend_handle::BackendHandle)
221/// identity bridge from #363 with the orchestration sequence: the backend
222/// pid comes from the verified daemon identity, and duplication goes
223/// through
224/// [`BackendHandle::try_duplicate_windows_handoff_handle`](crate::broker::backend_handle::BackendHandle::try_duplicate_windows_handoff_handle).
225#[cfg(windows)]
226pub fn execute_verified_windows_handoff<D>(
227 backend: &crate::broker::backend_handle::BackendHandle,
228 pipe_handle: WindowsHandleValue,
229 token: HandoffToken,
230 tokens: &mut HandoffTokenStore,
231 acks: &mut HandoffAckRegistry,
232 delivery: &mut D,
233) -> WindowsHandoffOutcome
234where
235 D: HandoffDelivery + ?Sized,
236{
237 let request = WindowsHandoffRequest::new(pipe_handle, backend.daemon_process.pid, token);
238 execute_windows_handoff_with_transport(
239 tokens,
240 acks,
241 &request,
242 |attempt| {
243 backend.try_duplicate_windows_handoff_handle(attempt.pipe_handle, attempt.handoff_token)
244 },
245 delivery,
246 )
247}
248
249/// Run one orchestrated handoff with an explicit duplication transport.
250///
251/// Platform-neutral tests inject a mock transport here; production callers
252/// use [`execute_windows_handoff`] or the Windows-only
253/// `execute_verified_windows_handoff`.
254pub fn execute_windows_handoff_with_transport<T, D>(
255 tokens: &mut HandoffTokenStore,
256 acks: &mut HandoffAckRegistry,
257 request: &WindowsHandoffRequest,
258 transport: T,
259 delivery: &mut D,
260) -> WindowsHandoffOutcome
261where
262 T: FnOnce(&DuplicateHandleAttempt) -> DuplicateHandleResult,
263 D: HandoffDelivery + ?Sized,
264{
265 let attempt =
266 DuplicateHandleAttempt::new(request.pipe_handle, request.backend_pid, request.token);
267 let duplicated = match transport(&attempt) {
268 Ok(success) => success,
269 Err(error) => {
270 abandon_pending(acks, tokens, &request.token);
271 return abandoned(
272 WindowsHandoffStage::Duplicate,
273 error.fallback_decision(),
274 None,
275 error.to_string(),
276 );
277 }
278 };
279 let backend_handle = duplicated.duplicated_handle;
280
281 if let Err(error) = delivery.deliver(backend_handle, &request.token) {
282 abandon_pending(acks, tokens, &request.token);
283 return abandoned(
284 WindowsHandoffStage::Deliver,
285 // The backend never adopts the connection, so the client-visible
286 // classification is the same as a missing acknowledgement.
287 HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
288 Some(backend_handle),
289 error.to_string(),
290 );
291 }
292
293 let deadline = ack_deadline_from(acks, Instant::now());
294 let acknowledged_at = match delivery.await_backend_ack(&request.token, deadline) {
295 Ok(at) => at,
296 Err(error) => {
297 abandon_pending(acks, tokens, &request.token);
298 return abandoned(
299 WindowsHandoffStage::AwaitAck,
300 HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
301 Some(backend_handle),
302 error.to_string(),
303 );
304 }
305 };
306
307 match acks.acknowledge(tokens, &request.token, acknowledged_at) {
308 Ok(acknowledged) => WindowsHandoffOutcome::Completed(CompletedWindowsHandoff {
309 duplicated,
310 acknowledged,
311 }),
312 Err(error) => {
313 // AckDeadlineExceeded already revoked the token; TokenNotPending
314 // means a sweep expired it. Revoke defensively either way so no
315 // error path can leave the one-time token presentable.
316 tokens.revoke(&request.token);
317 abandoned(
318 WindowsHandoffStage::Acknowledge,
319 HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
320 Some(backend_handle),
321 error.to_string(),
322 )
323 }
324 }
325}
326
327/// Abandon one pending handoff: drop the pending ACK entry and revoke the
328/// one-time token so a late backend presentation is rejected.
329fn abandon_pending(
330 acks: &mut HandoffAckRegistry,
331 tokens: &mut HandoffTokenStore,
332 token: &HandoffToken,
333) -> Option<PendingHandoffBackend> {
334 acks.abandon(tokens, token)
335}
336
337fn abandoned(
338 stage: WindowsHandoffStage,
339 decision: HandoffFallbackDecision,
340 leaked_backend_handle: Option<WindowsHandleValue>,
341 detail: String,
342) -> WindowsHandoffOutcome {
343 WindowsHandoffOutcome::FallbackToReconnect(WindowsHandoffFallback {
344 stage,
345 decision,
346 leaked_backend_handle,
347 detail,
348 })
349}
350
351fn ack_deadline_from(acks: &HandoffAckRegistry, now: Instant) -> Instant {
352 now.checked_add(acks.ack_deadline()).unwrap_or(now)
353}