Skip to main content

running_process/broker/server/handoff/
orchestrate.rs

1//! Production-shaped orchestration of one Windows handle-passing handoff
2//! (#354, slice 3).
3//!
4//! This module composes the pieces landed by earlier slices into one
5//! broker-side sequence:
6//!
7//! 1. duplicate the broker-held client pipe into the verified backend
8//!    process ([`super::try_duplicate_handle`], or the verified
9//!    [`BackendHandle`](crate::broker::backend_handle::BackendHandle)
10//!    bridge),
11//! 2. deliver the duplicated handle value plus the one-time token to the
12//!    backend through a [`HandoffDelivery`] implementation,
13//! 3. wait for the backend acknowledgement observed by the delivery
14//!    channel, and
15//! 4. complete the pending entry in the [`HandoffAckRegistry`], consuming
16//!    the one-time token exactly once.
17//!
18//! Any failure at any step abandons the handoff: the one-time token is
19//! revoked, the pending ACK entry is removed, and the caller receives
20//! [`WindowsHandoffOutcome::FallbackToReconnect`]. The negotiated
21//! `backend_pipe` reconnect path stays authoritative; orchestration
22//! failures are silent optimization failures, never client errors, and
23//! this function never panics on transport, delivery, or registry errors.
24//!
25//! # Delivery mechanism
26//!
27//! Delivery of the `(handle value, token)` pair is abstracted behind the
28//! [`HandoffDelivery`] trait so the orchestration sequence, token
29//! lifecycle, and fallback contract stay transport-agnostic. The
30//! production implementation is
31//! [`WireHandoffDelivery`](super::wire::WireHandoffDelivery) (#354 slice
32//! 6), which sends a `HandoffOffer` frame over a framed broker↔backend
33//! control connection and waits for the matching `HandoffAck`; tests also
34//! deliver over the child-helper stdin/stdout protocol from the #358/#363
35//! smoke tests.
36//!
37//! # Handle leak contract
38//!
39//! `DuplicateHandle` places the duplicated handle directly into the
40//! *backend's* handle table. Once duplication has succeeded, the broker
41//! cannot close that handle: closing a handle owned by another process
42//! would require a second `DUPLICATE_CLOSE_SOURCE` round-trip that is not
43//! part of this slice. If delivery or acknowledgement fails after
44//! duplication, the duplicated handle therefore leaks in the backend
45//! process until the backend exits. The outcome records it in
46//! [`WindowsHandoffFallback::leaked_backend_handle`] so callers can log
47//! and monitor the leak honestly instead of pretending cleanup happened.
48
49use std::time::Instant;
50
51use super::ack::{HandoffAckRegistry, PendingHandoffBackend};
52use super::fallback::{HandoffFallbackDecision, HandoffFallbackReason};
53use super::handoff_token::{HandoffToken, HandoffTokenStore};
54use super::windows::{
55    try_duplicate_handle, DuplicateHandleAttempt, DuplicateHandleResult, DuplicateHandleSuccess,
56    WindowsHandleValue,
57};
58use super::AcknowledgedHandoff;
59
60/// Inputs for one orchestrated Windows handle-passing handoff.
61#[derive(Clone, Copy, Debug, PartialEq, Eq)]
62pub struct WindowsHandoffRequest {
63    /// Broker-owned pipe handle to duplicate into the backend.
64    pub pipe_handle: WindowsHandleValue,
65    /// Verified backend process ID receiving the duplicated handle.
66    pub backend_pid: u32,
67    /// One-time token issued at Hello time and registered for an ACK.
68    pub token: HandoffToken,
69}
70
71impl WindowsHandoffRequest {
72    /// Build inputs for one orchestrated handoff.
73    pub fn new(pipe_handle: WindowsHandleValue, backend_pid: u32, token: HandoffToken) -> Self {
74        Self {
75            pipe_handle,
76            backend_pid,
77            token,
78        }
79    }
80}
81
82/// Delivery channel carrying the duplicated handle value and one-time token
83/// from the broker to the backend process.
84///
85/// Production wire delivery does not exist yet (see the module docs); the
86/// orchestration treats delivery as a pluggable step so the sequencing and
87/// fallback contract are real today.
88pub trait HandoffDelivery {
89    /// Deliver the duplicated handle value and paired token to the backend.
90    ///
91    /// The handle value is only meaningful inside the backend's handle
92    /// table. A returned error means the backend cannot be assumed to know
93    /// about the handle; the orchestrator abandons the handoff.
94    fn deliver(
95        &mut self,
96        handle: WindowsHandleValue,
97        token: &HandoffToken,
98    ) -> Result<(), HandoffDeliveryError>;
99
100    /// Block until the backend acknowledges adopting the handed-off
101    /// connection, or until `deadline`.
102    ///
103    /// Returns the instant the acknowledgement was observed. The
104    /// orchestrator still validates that instant against the
105    /// [`HandoffAckRegistry`] deadline registered at issuance, so a
106    /// delivery channel that misjudges the deadline cannot complete an
107    /// overdue handoff.
108    fn await_backend_ack(
109        &mut self,
110        token: &HandoffToken,
111        deadline: Instant,
112    ) -> Result<Instant, HandoffDeliveryError>;
113}
114
115/// Errors surfaced by a [`HandoffDelivery`] channel.
116#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
117pub enum HandoffDeliveryError {
118    /// The handle value and token could not be delivered to the backend.
119    #[error("handoff delivery to backend failed: {detail}")]
120    DeliveryFailed {
121        /// Human-readable failure detail for logs.
122        detail: String,
123    },
124    /// The backend acknowledgement was not observed before the deadline.
125    #[error("backend handoff ACK was not observed: {detail}")]
126    AckNotObserved {
127        /// Human-readable failure detail for logs.
128        detail: String,
129    },
130}
131
132/// Step of the orchestration at which a handoff was abandoned.
133#[derive(Clone, Copy, Debug, PartialEq, Eq)]
134pub enum WindowsHandoffStage {
135    /// `DuplicateHandle` into the backend process failed.
136    Duplicate,
137    /// Delivering the handle value and token to the backend failed.
138    Deliver,
139    /// The backend acknowledgement was not observed before the deadline.
140    AwaitAck,
141    /// The ACK registry rejected the acknowledgement (overdue or already
142    /// expired by a sweep).
143    Acknowledge,
144}
145
146/// A handoff completed end to end: handle duplicated, delivered, and
147/// acknowledged before the registry deadline.
148#[derive(Clone, Debug, PartialEq, Eq)]
149pub struct CompletedWindowsHandoff {
150    /// Successful duplication into the backend handle table.
151    pub duplicated: DuplicateHandleSuccess,
152    /// Timely backend acknowledgement that consumed the one-time token.
153    pub acknowledged: AcknowledgedHandoff,
154}
155
156/// A handoff abandoned at some orchestration stage.
157///
158/// The one-time token has been revoked and the pending ACK entry removed;
159/// the caller must keep using the negotiated `backend_pipe` reconnect path.
160#[derive(Clone, Debug, PartialEq, Eq)]
161pub struct WindowsHandoffFallback {
162    /// Stage at which the handoff was abandoned.
163    pub stage: WindowsHandoffStage,
164    /// Silent reconnect fallback decision for the client-visible contract.
165    pub decision: HandoffFallbackDecision,
166    /// Handle already duplicated into the backend's handle table when the
167    /// failure occurred. The broker cannot close another process's handle
168    /// (see the module-level leak contract); it leaks in the backend until
169    /// that process exits. `None` when duplication itself failed.
170    pub leaked_backend_handle: Option<WindowsHandleValue>,
171    /// Human-readable failure detail for logs.
172    pub detail: String,
173}
174
175/// Outcome of one orchestrated Windows handle-passing handoff.
176#[derive(Clone, Debug, PartialEq, Eq)]
177pub enum WindowsHandoffOutcome {
178    /// The backend adopted the connection before the ACK deadline.
179    Completed(CompletedWindowsHandoff),
180    /// The handoff was abandoned; the client reconnects via `backend_pipe`.
181    FallbackToReconnect(WindowsHandoffFallback),
182}
183
184impl WindowsHandoffOutcome {
185    /// Return true when the handoff completed end to end.
186    pub fn is_completed(&self) -> bool {
187        matches!(self, Self::Completed(_))
188    }
189
190    /// Return the fallback details when the handoff was abandoned.
191    pub fn fallback(&self) -> Option<&WindowsHandoffFallback> {
192        match self {
193            Self::Completed(_) => None,
194            Self::FallbackToReconnect(fallback) => Some(fallback),
195        }
196    }
197}
198
199/// Run one production-shaped Windows handoff with the real
200/// `DuplicateHandle` transport.
201///
202/// The token in `request` must have been issued from `tokens` and
203/// registered pending in `acks` (the Hello path does both). On success the
204/// token is consumed exactly once; on any failure it is revoked and the
205/// outcome degrades to the `backend_pipe` reconnect fallback.
206pub fn execute_windows_handoff<D>(
207    tokens: &mut HandoffTokenStore,
208    acks: &mut HandoffAckRegistry,
209    request: &WindowsHandoffRequest,
210    delivery: &mut D,
211) -> WindowsHandoffOutcome
212where
213    D: HandoffDelivery + ?Sized,
214{
215    execute_windows_handoff_with_transport(tokens, acks, request, try_duplicate_handle, delivery)
216}
217
218/// Run one production-shaped Windows handoff for a verified backend.
219///
220/// Composes the [`BackendHandle`](crate::broker::backend_handle::BackendHandle)
221/// identity bridge from #363 with the orchestration sequence: the backend
222/// pid comes from the verified daemon identity, and duplication goes
223/// through
224/// [`BackendHandle::try_duplicate_windows_handoff_handle`](crate::broker::backend_handle::BackendHandle::try_duplicate_windows_handoff_handle).
225#[cfg(windows)]
226pub fn execute_verified_windows_handoff<D>(
227    backend: &crate::broker::backend_handle::BackendHandle,
228    pipe_handle: WindowsHandleValue,
229    token: HandoffToken,
230    tokens: &mut HandoffTokenStore,
231    acks: &mut HandoffAckRegistry,
232    delivery: &mut D,
233) -> WindowsHandoffOutcome
234where
235    D: HandoffDelivery + ?Sized,
236{
237    let request = WindowsHandoffRequest::new(pipe_handle, backend.daemon_process.pid, token);
238    execute_windows_handoff_with_transport(
239        tokens,
240        acks,
241        &request,
242        |attempt| {
243            backend.try_duplicate_windows_handoff_handle(attempt.pipe_handle, attempt.handoff_token)
244        },
245        delivery,
246    )
247}
248
249/// Run one orchestrated handoff with an explicit duplication transport.
250///
251/// Platform-neutral tests inject a mock transport here; production callers
252/// use [`execute_windows_handoff`] or the Windows-only
253/// `execute_verified_windows_handoff`.
254pub fn execute_windows_handoff_with_transport<T, D>(
255    tokens: &mut HandoffTokenStore,
256    acks: &mut HandoffAckRegistry,
257    request: &WindowsHandoffRequest,
258    transport: T,
259    delivery: &mut D,
260) -> WindowsHandoffOutcome
261where
262    T: FnOnce(&DuplicateHandleAttempt) -> DuplicateHandleResult,
263    D: HandoffDelivery + ?Sized,
264{
265    let attempt =
266        DuplicateHandleAttempt::new(request.pipe_handle, request.backend_pid, request.token);
267    let duplicated = match transport(&attempt) {
268        Ok(success) => success,
269        Err(error) => {
270            abandon_pending(acks, tokens, &request.token);
271            return abandoned(
272                WindowsHandoffStage::Duplicate,
273                error.fallback_decision(),
274                None,
275                error.to_string(),
276            );
277        }
278    };
279    let backend_handle = duplicated.duplicated_handle;
280
281    if let Err(error) = delivery.deliver(backend_handle, &request.token) {
282        abandon_pending(acks, tokens, &request.token);
283        return abandoned(
284            WindowsHandoffStage::Deliver,
285            // The backend never adopts the connection, so the client-visible
286            // classification is the same as a missing acknowledgement.
287            HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
288            Some(backend_handle),
289            error.to_string(),
290        );
291    }
292
293    let deadline = ack_deadline_from(acks, Instant::now());
294    let acknowledged_at = match delivery.await_backend_ack(&request.token, deadline) {
295        Ok(at) => at,
296        Err(error) => {
297            abandon_pending(acks, tokens, &request.token);
298            return abandoned(
299                WindowsHandoffStage::AwaitAck,
300                HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
301                Some(backend_handle),
302                error.to_string(),
303            );
304        }
305    };
306
307    match acks.acknowledge(tokens, &request.token, acknowledged_at) {
308        Ok(acknowledged) => WindowsHandoffOutcome::Completed(CompletedWindowsHandoff {
309            duplicated,
310            acknowledged,
311        }),
312        Err(error) => {
313            // AckDeadlineExceeded already revoked the token; TokenNotPending
314            // means a sweep expired it. Revoke defensively either way so no
315            // error path can leave the one-time token presentable.
316            tokens.revoke(&request.token);
317            abandoned(
318                WindowsHandoffStage::Acknowledge,
319                HandoffFallbackDecision::new(HandoffFallbackReason::BackendAckTimeout),
320                Some(backend_handle),
321                error.to_string(),
322            )
323        }
324    }
325}
326
327/// Abandon one pending handoff: drop the pending ACK entry and revoke the
328/// one-time token so a late backend presentation is rejected.
329fn abandon_pending(
330    acks: &mut HandoffAckRegistry,
331    tokens: &mut HandoffTokenStore,
332    token: &HandoffToken,
333) -> Option<PendingHandoffBackend> {
334    acks.abandon(tokens, token)
335}
336
337fn abandoned(
338    stage: WindowsHandoffStage,
339    decision: HandoffFallbackDecision,
340    leaked_backend_handle: Option<WindowsHandleValue>,
341    detail: String,
342) -> WindowsHandoffOutcome {
343    WindowsHandoffOutcome::FallbackToReconnect(WindowsHandoffFallback {
344        stage,
345        decision,
346        leaked_backend_handle,
347        detail,
348    })
349}
350
351fn ack_deadline_from(acks: &HandoffAckRegistry, now: Instant) -> Instant {
352    now.checked_add(acks.ack_deadline()).unwrap_or(now)
353}