ruvector_profiler/
latency.rs1#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2pub struct LatencyRecord {
3 pub sample_id: usize,
4 pub wall_time_us: u64,
5 pub kernel_time_us: u64,
6 pub seq_len: usize,
7}
8
9#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
10pub struct LatencyStats {
11 pub p50_us: u64,
12 pub p95_us: u64,
13 pub p99_us: u64,
14 pub mean_us: f64,
15 pub std_us: f64,
16 pub n: usize,
17}
18
19pub fn compute_latency_stats(records: &[LatencyRecord]) -> LatencyStats {
21 let n = records.len();
22 if n == 0 {
23 return LatencyStats { p50_us: 0, p95_us: 0, p99_us: 0, mean_us: 0.0, std_us: 0.0, n: 0 };
24 }
25 let mut times: Vec<u64> = records.iter().map(|r| r.wall_time_us).collect();
26 times.sort_unstable();
27 let mean = times.iter().sum::<u64>() as f64 / n as f64;
28 let var = times.iter().map(|&t| (t as f64 - mean).powi(2)).sum::<f64>() / n as f64;
29 LatencyStats {
30 p50_us: pctl(×, 50.0), p95_us: pctl(×, 95.0), p99_us: pctl(×, 99.0),
31 mean_us: mean, std_us: var.sqrt(), n,
32 }
33}
34
35fn pctl(sorted: &[u64], p: f64) -> u64 {
36 let idx = ((p / 100.0 * sorted.len() as f64).ceil() as usize).min(sorted.len()).saturating_sub(1);
37 sorted[idx]
38}
39
40#[cfg(test)]
41mod tests {
42 use super::*;
43 fn recs(ts: &[u64]) -> Vec<LatencyRecord> {
44 ts.iter().enumerate().map(|(i, &t)| LatencyRecord {
45 sample_id: i, wall_time_us: t, kernel_time_us: t, seq_len: 128,
46 }).collect()
47 }
48
49 #[test] fn empty() { assert_eq!(compute_latency_stats(&[]).n, 0); }
50 #[test] fn single() {
51 let s = compute_latency_stats(&recs(&[42]));
52 assert_eq!((s.p50_us, s.p99_us, s.n), (42, 42, 1));
53 }
54 #[test] fn multi() {
55 let s = compute_latency_stats(&recs(&[10,20,30,40,50,60,70,80,90,100]));
56 assert_eq!(s.p50_us, 50);
57 assert!((s.mean_us - 55.0).abs() < 1e-9);
58 }
59 #[test] fn unsorted() { assert_eq!(compute_latency_stats(&recs(&[100,10,50,90,20])).p50_us, 50); }
60}