1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
//! Copyright 2026 0xClandestine, Ekryski, TheTom, Ambisphaeric
//! SPDX-License-Identifier: Apache-2.0
/// Summary statistics for a set of GPU timing measurements.
#[derive(Debug, Clone)]
pub struct BenchStats {
/// Minimum (best) GPU execution time in microseconds. Represents the
/// steady-state runtime once DVFS has settled; preferred for throughput
/// reporting because slow tail samples are leftover ramp / scheduler noise.
pub min_us: f64,
/// Mean GPU execution time in microseconds.
pub mean_us: f64,
/// Median (p50) GPU execution time in microseconds.
pub median_us: f64,
/// 95th-percentile GPU execution time in microseconds.
pub p95_us: f64,
/// 99th-percentile GPU execution time in microseconds.
pub p99_us: f64,
/// Standard deviation in microseconds.
pub stddev_us: f64,
/// Coefficient of variation (stddev/mean × 100). >5% suggests instability.
pub cv_pct: f64,
}
impl BenchStats {
pub fn from_samples(mut samples: Vec<f64>) -> Self {
assert!(!samples.is_empty());
samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = samples.len();
let min = samples[0];
let mean = samples.iter().sum::<f64>() / n as f64;
let median = samples[n / 2];
let p95 = samples[(n * 95 / 100).min(n - 1)];
let p99 = samples[(n * 99 / 100).min(n - 1)];
let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
let stddev = variance.sqrt();
let cv_pct = if mean > 0.0 { stddev / mean * 100.0 } else { 0.0 };
BenchStats {
min_us: min,
mean_us: mean,
median_us: median,
p95_us: p95,
p99_us: p99,
stddev_us: stddev,
cv_pct,
}
}
/// True if timing data came from a real GPU dispatch (non-macOS always returns false).
pub fn is_valid(&self) -> bool { self.mean_us > 0.0 }
}
#[cfg(test)]
mod tests {
use super::BenchStats;
#[test]
fn computes_median_and_tail_percentiles() {
let st = BenchStats::from_samples(vec![1.0, 2.0, 3.0, 4.0, 10.0]);
assert_eq!(st.min_us, 1.0);
assert_eq!(st.median_us, 3.0);
assert_eq!(st.p95_us, 10.0);
assert_eq!(st.p99_us, 10.0);
assert!(st.stddev_us > 0.0);
}
/// The bimodal case that motivates min-based throughput reporting: the
/// first half of the timed window is still in DVFS ramp (slow), and the
/// second half has settled (fast). Median lands on the slow side and a
/// 1-sample shift in the split flips it to the fast side; min is stable.
#[test]
fn min_is_stable_under_bimodal_split() {
// 5 slow + 5 fast → median is on the slow side.
let slow_half = BenchStats::from_samples(vec![
250.0, 250.0, 250.0, 250.0, 250.0, 120.0, 120.0, 120.0, 120.0, 120.0,
]);
// 4 slow + 6 fast → median flips to the fast side.
let fast_half = BenchStats::from_samples(vec![
250.0, 250.0, 250.0, 250.0, 120.0, 120.0, 120.0, 120.0, 120.0, 120.0,
]);
assert!(slow_half.median_us - fast_half.median_us >= 100.0);
// Min is identical regardless of the split.
assert_eq!(slow_half.min_us, 120.0);
assert_eq!(fast_half.min_us, 120.0);
}
}