Skip to main content

running_process/broker/server/handoff/
latency.rs

1//! Latency comparison helpers for Phase 6 handoff validation.
2//!
3//! Handoff is only worth enabling when the platform transfer path is faster
4//! than the reconnect fallback it replaces. This module keeps that comparison
5//! deterministic and testable; callers can feed real measurements when the
6//! end-to-end handoff path is wired into a perf run.
7
8use std::time::Duration;
9
10/// Collect one warmed-up latency sample set.
11///
12/// `sample` runs once per iteration and returns the duration of the region
13/// the caller timed (callers measure with [`std::time::Instant`], the
14/// process-wide monotonic clock, so wall-clock adjustments cannot skew the
15/// samples). The first `warmup` iterations run but are discarded so cold
16/// caches, lazy allocations, and first-connection costs do not distort the
17/// measured distribution.
18pub fn collect_latency_samples<F>(warmup: usize, iterations: usize, mut sample: F) -> Vec<Duration>
19where
20    F: FnMut() -> Duration,
21{
22    for _ in 0..warmup {
23        let _ = sample();
24    }
25    (0..iterations).map(|_| sample()).collect()
26}
27
28/// Summarize one measured sample set at the frozen P50/P99 percentiles.
29///
30/// Returns `None` when no samples were collected, so harnesses cannot
31/// report percentiles for an empty run.
32pub fn summarize_latency_samples(samples: &[Duration]) -> Option<HandoffLatencySummary> {
33    summarize_handoff_latencies(samples, EmptySampleSet::Handoff).ok()
34}
35
36/// Percentile summary for one handoff latency sample set.
37#[derive(Clone, Copy, Debug, PartialEq, Eq)]
38pub struct HandoffLatencySummary {
39    /// Number of samples summarized.
40    pub sample_count: usize,
41    /// P50 latency.
42    pub p50: Duration,
43    /// P99 latency.
44    pub p99: Duration,
45}
46
47/// Comparison between optimized handoff and reconnect fallback samples.
48#[derive(Clone, Copy, Debug, PartialEq, Eq)]
49pub struct HandoffLatencyComparison {
50    /// Summary for successful handoff samples.
51    pub handoff: HandoffLatencySummary,
52    /// Summary for reconnect fallback samples.
53    pub fallback: HandoffLatencySummary,
54}
55
56impl HandoffLatencyComparison {
57    /// Return the P50 latency saved by handoff over reconnect fallback.
58    pub fn p50_savings(&self) -> Duration {
59        self.fallback.p50.saturating_sub(self.handoff.p50)
60    }
61
62    /// Return the P99 latency saved by handoff over reconnect fallback.
63    pub fn p99_savings(&self) -> Duration {
64        self.fallback.p99.saturating_sub(self.handoff.p99)
65    }
66
67    /// Return true when handoff is strictly faster at both frozen percentiles.
68    pub fn proves_handoff_faster(&self) -> bool {
69        self.handoff.p50 < self.fallback.p50 && self.handoff.p99 < self.fallback.p99
70    }
71}
72
73/// Errors returned when handoff latency does not beat reconnect fallback.
74#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
75pub enum HandoffLatencyError {
76    /// No handoff samples were supplied.
77    #[error("handoff latency comparison received no handoff samples")]
78    EmptyHandoffSamples,
79    /// No reconnect fallback samples were supplied.
80    #[error("handoff latency comparison received no fallback samples")]
81    EmptyFallbackSamples,
82    /// Handoff P50 was not faster than reconnect fallback P50.
83    #[error(
84        "handoff P50 was not faster than fallback: handoff {handoff:?}, fallback {fallback:?}"
85    )]
86    P50NotFaster {
87        /// Handoff P50.
88        handoff: Duration,
89        /// Reconnect fallback P50.
90        fallback: Duration,
91    },
92    /// Handoff P99 was not faster than reconnect fallback P99.
93    #[error(
94        "handoff P99 was not faster than fallback: handoff {handoff:?}, fallback {fallback:?}"
95    )]
96    P99NotFaster {
97        /// Handoff P99.
98        handoff: Duration,
99        /// Reconnect fallback P99.
100        fallback: Duration,
101    },
102}
103
104/// Compare measured handoff samples against reconnect fallback samples.
105///
106/// The comparison requires handoff to be strictly faster at both P50 and P99.
107/// This prevents Phase 6 from treating equal or slower handle passing as a
108/// successful optimization.
109pub fn compare_handoff_latency(
110    handoff_samples: &[Duration],
111    fallback_samples: &[Duration],
112) -> Result<HandoffLatencyComparison, HandoffLatencyError> {
113    let handoff = summarize_handoff_latencies(handoff_samples, EmptySampleSet::Handoff)?;
114    let fallback = summarize_handoff_latencies(fallback_samples, EmptySampleSet::Fallback)?;
115    let comparison = HandoffLatencyComparison { handoff, fallback };
116
117    if comparison.handoff.p50 >= comparison.fallback.p50 {
118        return Err(HandoffLatencyError::P50NotFaster {
119            handoff: comparison.handoff.p50,
120            fallback: comparison.fallback.p50,
121        });
122    }
123    if comparison.handoff.p99 >= comparison.fallback.p99 {
124        return Err(HandoffLatencyError::P99NotFaster {
125            handoff: comparison.handoff.p99,
126            fallback: comparison.fallback.p99,
127        });
128    }
129
130    Ok(comparison)
131}
132
133#[derive(Clone, Copy, Debug, PartialEq, Eq)]
134enum EmptySampleSet {
135    Handoff,
136    Fallback,
137}
138
139fn summarize_handoff_latencies(
140    samples: &[Duration],
141    empty: EmptySampleSet,
142) -> Result<HandoffLatencySummary, HandoffLatencyError> {
143    if samples.is_empty() {
144        return Err(match empty {
145            EmptySampleSet::Handoff => HandoffLatencyError::EmptyHandoffSamples,
146            EmptySampleSet::Fallback => HandoffLatencyError::EmptyFallbackSamples,
147        });
148    }
149
150    let mut sorted = samples.to_vec();
151    sorted.sort_unstable();
152    Ok(HandoffLatencySummary {
153        sample_count: sorted.len(),
154        p50: percentile_nearest_rank(&sorted, 50),
155        p99: percentile_nearest_rank(&sorted, 99),
156    })
157}
158
159fn percentile_nearest_rank(sorted: &[Duration], percentile: usize) -> Duration {
160    debug_assert!(!sorted.is_empty());
161    debug_assert!((1..=100).contains(&percentile));
162
163    let rank = sorted.len() * percentile;
164    let index = rank.div_ceil(100).saturating_sub(1);
165    sorted[index.min(sorted.len() - 1)]
166}